upstream/mercurial-mirror Commit - r52013:532e74ad

rust: run a clippy pass with the latest stable version...

Raphaël Gomès -

r52013:532e74ad default

parent child

rust/hg-core/src/config/layer.rs

0 +2 -2

              // layer.rs
              //
              // Copyright 2020
              //      Valentin Gatien-Baron,
              //      Raphaël Gomès <rgomes@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              use crate::errors::HgError;
              use crate::exit_codes::CONFIG_PARSE_ERROR_ABORT;
              use crate::utils::files::{get_bytes_from_path, get_path_from_bytes};
              use format_bytes::{format_bytes, write_bytes, DisplayBytes};
              use lazy_static::lazy_static;
              use regex::bytes::Regex;
              use std::collections::HashMap;
              use std::path::{Path, PathBuf};
              lazy_static! {
                  static ref SECTION_RE: Regex = make_regex(r"^\[([^\[]+)\]");
                  static ref ITEM_RE: Regex = make_regex(r"^([^=\s][^=]*?)\s*=\s*((.*\S)?)");
                  /// Continuation whitespace
                  static ref CONT_RE: Regex = make_regex(r"^\s+(\S|\S.*\S)\s*$");
                  static ref EMPTY_RE: Regex = make_regex(r"^(;|#|\s*$)");
                  static ref COMMENT_RE: Regex = make_regex(r"^(;|#)");
                  /// A directive that allows for removing previous entries
                  static ref UNSET_RE: Regex = make_regex(r"^%unset\s+(\S+)");
                  /// A directive that allows for including other config files
                  static ref INCLUDE_RE: Regex = make_regex(r"^%include\s+(\S|\S.*\S)\s*$");
              }
              /// All config values separated by layers of precedence.
              /// Each config source may be split in multiple layers if `%include` directives
              /// are used.
              /// TODO detail the general precedence
              #[derive(Clone)]
              pub struct ConfigLayer {
                  /// Mapping of the sections to their items
                  sections: HashMap<Vec<u8>, ConfigItem>,
                  /// All sections (and their items/values) in a layer share the same origin
                  pub origin: ConfigOrigin,
                  /// Whether this layer comes from a trusted user or group
                  pub trusted: bool,
              }
              impl ConfigLayer {
                  pub fn new(origin: ConfigOrigin) -> Self {
                      ConfigLayer {
                          sections: HashMap::new(),
                          trusted: true, // TODO check
                          origin,
                      }
                  }
                  /// Parse `--config` CLI arguments and return a layer if there’s any
                  pub(crate) fn parse_cli_args(
                      cli_config_args: impl IntoIterator<Item = impl AsRef<[u8]>>,
                  ) -> Result<Option<Self>, ConfigError> {
                      fn parse_one(arg: &[u8]) -> Option<(Vec<u8>, Vec<u8>, Vec<u8>)> {
                          use crate::utils::SliceExt;
                          let (section_and_item, value) = arg.split_2(b'=')?;
                          let (section, item) = section_and_item.trim().split_2(b'.')?;
                          Some((
                              section.to_owned(),
                              item.to_owned(),
                              value.trim().to_owned(),
                          ))
                      }
                      let mut layer = Self::new(ConfigOrigin::CommandLine);
                      for arg in cli_config_args {
                          let arg = arg.as_ref();
                          if let Some((section, item, value)) = parse_one(arg) {
                              layer.add(section, item, value, None);
                          } else {
                              Err(HgError::abort(
                                  format!(
                                      "abort: malformed --config option: '{}' \
                                  (use --config section.name=value)",
                                      String::from_utf8_lossy(arg),
                                  ),
                                  CONFIG_PARSE_ERROR_ABORT,
                                  None,
                              ))?
                          }
                      }
                      if layer.sections.is_empty() {
                          Ok(None)
                      } else {
                          Ok(Some(layer))
                      }
                  }
                  /// Returns whether this layer comes from `--config` CLI arguments
                  pub(crate) fn is_from_command_line(&self) -> bool {
                      matches!(self.origin, ConfigOrigin::CommandLine)
                  }
                  /// Add an entry to the config, overwriting the old one if already present.
                  pub fn add(
                      &mut self,
                      section: Vec<u8>,
                      item: Vec<u8>,
                      value: Vec<u8>,
                      line: Option<usize>,
                  ) {
                      self.sections
                          .entry(section)
-                         .or_insert_with(HashMap::new)
+                         .or_default()
                          .insert(item, ConfigValue { bytes: value, line });
                  }
                  /// Returns the config value in `<section>.<item>` if it exists
                  pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&ConfigValue> {
                      self.sections.get(section)?.get(item)
                  }
                  /// Returns the keys defined in the given section
                  pub fn iter_keys(&self, section: &[u8]) -> impl Iterator<Item = &[u8]> {
                      self.sections
                          .get(section)
                          .into_iter()
                          .flat_map(|section| section.keys().map(|vec| &**vec))
                  }
                  /// Returns the (key, value) pairs defined in the given section
                  pub fn iter_section<'layer>(
                      &'layer self,
                      section: &[u8],
                  ) -> impl Iterator<Item = (&'layer [u8], &'layer [u8])> {
                      self.sections
                          .get(section)
                          .into_iter()
                          .flat_map(|section| section.iter().map(|(k, v)| (&**k, &*v.bytes)))
                  }
                  /// Returns whether any key is defined in the given section
                  pub fn has_non_empty_section(&self, section: &[u8]) -> bool {
                      self.sections
                          .get(section)
                          .map_or(false, |section| !section.is_empty())
                  }
                  pub fn is_empty(&self) -> bool {
                      self.sections.is_empty()
                  }
                  /// Returns a `Vec` of layers in order of precedence (so, in read order),
                  /// recursively parsing the `%include` directives if any.
                  pub fn parse(src: &Path, data: &[u8]) -> Result<Vec<Self>, ConfigError> {
                      let mut layers = vec![];
                      // Discard byte order mark if any
                      let data = if data.starts_with(b"\xef\xbb\xbf") {
                          &data[3..]
                      } else {
                          data
                      };
                      // TODO check if it's trusted
                      let mut current_layer = Self::new(ConfigOrigin::File(src.to_owned()));
                      let mut lines_iter =
                          data.split(|b| *b == b'\n').enumerate().peekable();
                      let mut section = b"".to_vec();
                      while let Some((index, bytes)) = lines_iter.next() {
                          let line = Some(index + 1);
                          if let Some(m) = INCLUDE_RE.captures(bytes) {
                              let filename_bytes = &m[1];
                              let filename_bytes = crate::utils::expand_vars(filename_bytes);
                              // `Path::parent` only fails for the root directory,
                              // which `src` can’t be since we’ve managed to open it as a
                              // file.
                              let dir = src
                                  .parent()
                                  .expect("Path::parent fail on a file we’ve read");
                              // `Path::join` with an absolute argument correctly ignores the
                              // base path
-                             let filename = dir.join(&get_path_from_bytes(&filename_bytes));
+                             let filename = dir.join(get_path_from_bytes(&filename_bytes));
                              match std::fs::read(&filename) {
                                  Ok(data) => {
                                      layers.push(current_layer);
                                      layers.extend(Self::parse(&filename, &data)?);
                                      current_layer =
                                          Self::new(ConfigOrigin::File(src.to_owned()));
                                  }
                                  Err(error) => {
                                      if error.kind() != std::io::ErrorKind::NotFound {
                                          return Err(ConfigParseError {
                                              origin: ConfigOrigin::File(src.to_owned()),
                                              line,
                                              message: format_bytes!(
                                                  b"cannot include {} ({})",
                                                  filename_bytes,
                                                  format_bytes::Utf8(error)
                                              ),
                                          }
                                          .into());
                                      }
                                  }
                              }
                          } else if EMPTY_RE.captures(bytes).is_some() {
                          } else if let Some(m) = SECTION_RE.captures(bytes) {
                              section = m[1].to_vec();
                          } else if let Some(m) = ITEM_RE.captures(bytes) {
                              let item = m[1].to_vec();
                              let mut value = m[2].to_vec();
                              loop {
                                  match lines_iter.peek() {
                                      None => break,
                                      Some((_, v)) => {
                                          if COMMENT_RE.captures(v).is_some() {
                                          } else if CONT_RE.captures(v).is_some() {
                                              value.extend(b"\n");
                                              value.extend(&m[1]);
                                          } else {
                                              break;
                                          }
                                      }
                                  };
                                  lines_iter.next();
                              }
                              current_layer.add(section.clone(), item, value, line);
                          } else if let Some(m) = UNSET_RE.captures(bytes) {
                              if let Some(map) = current_layer.sections.get_mut(&section) {
                                  map.remove(&m[1]);
                              }
                          } else {
                              let message = if bytes.starts_with(b" ") {
                                  format_bytes!(b"unexpected leading whitespace: {}", bytes)
                              } else {
                                  bytes.to_owned()
                              };
                              return Err(ConfigParseError {
                                  origin: ConfigOrigin::File(src.to_owned()),
                                  line,
                                  message,
                              }
                              .into());
                          }
                      }
                      if !current_layer.is_empty() {
                          layers.push(current_layer);
                      }
                      Ok(layers)
                  }
              }
              impl DisplayBytes for ConfigLayer {
                  fn display_bytes(
                      &self,
                      out: &mut dyn std::io::Write,
                  ) -> std::io::Result<()> {
                      let mut sections: Vec<_> = self.sections.iter().collect();
                      sections.sort_by(|e0, e1| e0.0.cmp(e1.0));
                      for (section, items) in sections.into_iter() {
                          let mut items: Vec<_> = items.iter().collect();
                          items.sort_by(|e0, e1| e0.0.cmp(e1.0));
                          for (item, config_entry) in items {
                              write_bytes!(
                                  out,
                                  b"{}.{}={} # {}\n",
                                  section,
                                  item,
                                  &config_entry.bytes,
                                  &self.origin,
                              )?
                          }
                      }
                      Ok(())
                  }
              }
              /// Mapping of section item to value.
              /// In the following:
              /// ```text
              /// [ui]
              /// paginate=no
              /// ```
              /// "paginate" is the section item and "no" the value.
              pub type ConfigItem = HashMap<Vec<u8>, ConfigValue>;
              #[derive(Clone, Debug, PartialEq)]
              pub struct ConfigValue {
                  /// The raw bytes of the value (be it from the CLI, env or from a file)
                  pub bytes: Vec<u8>,
                  /// Only present if the value comes from a file, 1-indexed.
                  pub line: Option<usize>,
              }
              #[derive(Clone, Debug, PartialEq, Eq)]
              pub enum ConfigOrigin {
                  /// From a configuration file
                  File(PathBuf),
                  /// From [ui.tweakdefaults]
                  Tweakdefaults,
                  /// From a `--config` CLI argument
                  CommandLine,
                  /// From a `--color` CLI argument
                  CommandLineColor,
                  /// From environment variables like `$PAGER` or `$EDITOR`
                  Environment(Vec<u8>),
                  /// From configitems.toml
                  Defaults,
                  /* TODO extensions
                   * TODO Python resources?
                   * Others? */
              }
              impl DisplayBytes for ConfigOrigin {
                  fn display_bytes(
                      &self,
                      out: &mut dyn std::io::Write,
                  ) -> std::io::Result<()> {
                      match self {
                          ConfigOrigin::File(p) => out.write_all(&get_bytes_from_path(p)),
                          ConfigOrigin::CommandLine => out.write_all(b"--config"),
                          ConfigOrigin::CommandLineColor => out.write_all(b"--color"),
                          ConfigOrigin::Environment(e) => write_bytes!(out, b"${}", e),
                          ConfigOrigin::Tweakdefaults => {
                              write_bytes!(out, b"ui.tweakdefaults")
                          }
                          ConfigOrigin::Defaults => {
                              write_bytes!(out, b"configitems.toml")
                          }
                      }
                  }
              }
              #[derive(Debug)]
              pub struct ConfigParseError {
                  pub origin: ConfigOrigin,
                  pub line: Option<usize>,
                  pub message: Vec<u8>,
              }
              #[derive(Debug, derive_more::From)]
              pub enum ConfigError {
                  Parse(ConfigParseError),
                  Other(HgError),
              }
              fn make_regex(pattern: &'static str) -> Regex {
                  Regex::new(pattern).expect("expected a valid regex")
              }

rust/hg-core/src/config/mod.rs

0 +1 -1

              // config.rs
              //
              // Copyright 2020
              //      Valentin Gatien-Baron,
              //      Raphaël Gomès <rgomes@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Mercurial config parsing and interfaces.
              pub mod config_items;
              mod layer;
              mod plain_info;
              mod values;
              pub use layer::{ConfigError, ConfigOrigin, ConfigParseError};
              use lazy_static::lazy_static;
              pub use plain_info::PlainInfo;
              use self::config_items::DefaultConfig;
              use self::config_items::DefaultConfigItem;
              use self::layer::ConfigLayer;
              use self::layer::ConfigValue;
              use crate::errors::HgError;
              use crate::errors::{HgResultExt, IoResultExt};
              use crate::utils::files::get_bytes_from_os_str;
              use format_bytes::{write_bytes, DisplayBytes};
              use std::collections::HashSet;
              use std::env;
              use std::fmt;
              use std::path::{Path, PathBuf};
              use std::str;
              lazy_static! {
                  static ref DEFAULT_CONFIG: Result<DefaultConfig, HgError> = {
                      DefaultConfig::from_contents(include_str!(
                          "../../../../mercurial/configitems.toml"
                      ))
                  };
              }
              /// Holds the config values for the current repository
              /// TODO update this docstring once we support more sources
              #[derive(Clone)]
              pub struct Config {
                  layers: Vec<layer::ConfigLayer>,
                  plain: PlainInfo,
              }
              impl DisplayBytes for Config {
                  fn display_bytes(
                      &self,
                      out: &mut dyn std::io::Write,
                  ) -> std::io::Result<()> {
                      for (index, layer) in self.layers.iter().rev().enumerate() {
                          write_bytes!(
                              out,
                              b"==== Layer {} (trusted: {}) ====\n{}",
                              index,
                              if layer.trusted {
                                  &b"yes"[..]
                              } else {
                                  &b"no"[..]
                              },
                              layer
                          )?;
                      }
                      Ok(())
                  }
              }
              pub enum ConfigSource {
                  /// Absolute path to a config file
                  AbsPath(PathBuf),
                  /// Already parsed (from the CLI, env, Python resources, etc.)
                  Parsed(layer::ConfigLayer),
              }
              #[derive(Debug)]
              pub struct ConfigValueParseErrorDetails {
                  pub origin: ConfigOrigin,
                  pub line: Option<usize>,
                  pub section: Vec<u8>,
                  pub item: Vec<u8>,
                  pub value: Vec<u8>,
                  pub expected_type: &'static str,
              }
              // boxed to avoid very large Result types
              pub type ConfigValueParseError = Box<ConfigValueParseErrorDetails>;
              impl fmt::Display for ConfigValueParseError {
                  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
                      // TODO: add origin and line number information, here and in
                      // corresponding python code
                      write!(
                          f,
                          "config error: {}.{} is not a {} ('{}')",
                          String::from_utf8_lossy(&self.section),
                          String::from_utf8_lossy(&self.item),
                          self.expected_type,
                          String::from_utf8_lossy(&self.value)
                      )
                  }
              }
              /// Returns true if the config item is disabled by PLAIN or PLAINEXCEPT
              fn should_ignore(plain: &PlainInfo, section: &[u8], item: &[u8]) -> bool {
                  // duplication with [_applyconfig] in [ui.py],
                  if !plain.is_plain() {
                      return false;
                  }
                  if section == b"alias" {
                      return plain.plainalias();
                  }
                  if section == b"revsetalias" {
                      return plain.plainrevsetalias();
                  }
                  if section == b"templatealias" {
                      return plain.plaintemplatealias();
                  }
                  if section == b"ui" {
                      let to_delete: &[&[u8]] = &[
                          b"debug",
                          b"fallbackencoding",
                          b"quiet",
                          b"slash",
                          b"logtemplate",
                          b"message-output",
                          b"statuscopies",
                          b"style",
                          b"traceback",
                          b"verbose",
                      ];
                      return to_delete.contains(&item);
                  }
                  let sections_to_delete: &[&[u8]] =
                      &[b"defaults", b"commands", b"command-templates"];
                  sections_to_delete.contains(&section)
              }
              impl Config {
                  /// The configuration to use when printing configuration-loading errors
                  pub fn empty() -> Self {
                      Self {
                          layers: Vec::new(),
                          plain: PlainInfo::empty(),
                      }
                  }
                  /// Load system and user configuration from various files.
                  ///
                  /// This is also affected by some environment variables.
                  pub fn load_non_repo() -> Result<Self, ConfigError> {
                      let mut config = Self::empty();
                      let opt_rc_path = env::var_os("HGRCPATH");
                      // HGRCPATH replaces system config
                      if opt_rc_path.is_none() {
                          config.add_system_config()?
                      }
                      config.add_for_environment_variable("EDITOR", b"ui", b"editor");
                      config.add_for_environment_variable("VISUAL", b"ui", b"editor");
                      config.add_for_environment_variable("PAGER", b"pager", b"pager");
                      // These are set by `run-tests.py --rhg` to enable fallback for the
                      // entire test suite. Alternatives would be setting configuration
                      // through `$HGRCPATH` but some tests override that, or changing the
                      // `hg` shell alias to include `--config` but that disrupts tests that
                      // print command lines and check expected output.
                      config.add_for_environment_variable(
                          "RHG_ON_UNSUPPORTED",
                          b"rhg",
                          b"on-unsupported",
                      );
                      config.add_for_environment_variable(
                          "RHG_FALLBACK_EXECUTABLE",
                          b"rhg",
                          b"fallback-executable",
                      );
                      // HGRCPATH replaces user config
                      if opt_rc_path.is_none() {
                          config.add_user_config()?
                      }
                      if let Some(rc_path) = &opt_rc_path {
                          for path in env::split_paths(rc_path) {
                              if !path.as_os_str().is_empty() {
                                  if path.is_dir() {
                                      config.add_trusted_dir(&path)?
                                  } else {
                                      config.add_trusted_file(&path)?
                                  }
                              }
                          }
                      }
                      Ok(config)
                  }
                  pub fn load_cli_args(
                      &mut self,
                      cli_config_args: impl IntoIterator<Item = impl AsRef<[u8]>>,
                      color_arg: Option<Vec<u8>>,
                  ) -> Result<(), ConfigError> {
                      if let Some(layer) = ConfigLayer::parse_cli_args(cli_config_args)? {
                          self.layers.push(layer)
                      }
                      if let Some(arg) = color_arg {
                          let mut layer = ConfigLayer::new(ConfigOrigin::CommandLineColor);
                          layer.add(b"ui"[..].into(), b"color"[..].into(), arg, None);
                          self.layers.push(layer)
                      }
                      Ok(())
                  }
                  fn add_trusted_dir(&mut self, path: &Path) -> Result<(), ConfigError> {
                      if let Some(entries) = std::fs::read_dir(path)
                          .when_reading_file(path)
                          .io_not_found_as_none()?
                      {
                          let mut file_paths = entries
                              .map(|result| {
                                  result.when_reading_file(path).map(|entry| entry.path())
                              })
                              .collect::<Result<Vec<_>, _>>()?;
                          file_paths.sort();
                          for file_path in &file_paths {
                              if file_path.extension() == Some(std::ffi::OsStr::new("rc")) {
                                  self.add_trusted_file(file_path)?
                              }
                          }
                      }
                      Ok(())
                  }
                  fn add_trusted_file(&mut self, path: &Path) -> Result<(), ConfigError> {
                      if let Some(data) = std::fs::read(path)
                          .when_reading_file(path)
                          .io_not_found_as_none()?
                      {
                          self.layers.extend(ConfigLayer::parse(path, &data)?)
                      }
                      Ok(())
                  }
                  fn add_for_environment_variable(
                      &mut self,
                      var: &str,
                      section: &[u8],
                      key: &[u8],
                  ) {
                      if let Some(value) = env::var_os(var) {
                          let origin = layer::ConfigOrigin::Environment(var.into());
                          let mut layer = ConfigLayer::new(origin);
                          layer.add(
                              section.to_owned(),
                              key.to_owned(),
                              get_bytes_from_os_str(value),
                              None,
                          );
                          self.layers.push(layer)
                      }
                  }
                  #[cfg(unix)] // TODO: other platforms
                  fn add_system_config(&mut self) -> Result<(), ConfigError> {
                      let mut add_for_prefix = |prefix: &Path| -> Result<(), ConfigError> {
                          let etc = prefix.join("etc").join("mercurial");
                          self.add_trusted_file(&etc.join("hgrc"))?;
                          self.add_trusted_dir(&etc.join("hgrc.d"))
                      };
                      let root = Path::new("/");
                      // TODO: use `std::env::args_os().next().unwrap()` a.k.a. argv[0]
                      // instead? TODO: can this be a relative path?
                      let hg = crate::utils::current_exe()?;
                      // TODO: this order (per-installation then per-system) matches
                      // `systemrcpath()` in `mercurial/scmposix.py`, but
                      // `mercurial/helptext/config.txt` suggests it should be reversed
                      if let Some(installation_prefix) = hg.parent().and_then(Path::parent) {
                          if installation_prefix != root {
                              add_for_prefix(installation_prefix)?
                          }
                      }
                      add_for_prefix(root)?;
                      Ok(())
                  }
                  #[cfg(unix)] // TODO: other plateforms
                  fn add_user_config(&mut self) -> Result<(), ConfigError> {
                      let opt_home = home::home_dir();
                      if let Some(home) = &opt_home {
                          self.add_trusted_file(&home.join(".hgrc"))?
                      }
                      let darwin = cfg!(any(target_os = "macos", target_os = "ios"));
                      if !darwin {
                          if let Some(config_home) = env::var_os("XDG_CONFIG_HOME")
                              .map(PathBuf::from)
                              .or_else(|| opt_home.map(|home| home.join(".config")))
                          {
                              self.add_trusted_file(&config_home.join("hg").join("hgrc"))?
                          }
                      }
                      Ok(())
                  }
                  /// Loads in order, which means that the precedence is the same
                  /// as the order of `sources`.
                  pub fn load_from_explicit_sources(
                      sources: Vec<ConfigSource>,
                  ) -> Result<Self, ConfigError> {
                      let mut layers = vec![];
                      for source in sources.into_iter() {
                          match source {
                              ConfigSource::Parsed(c) => layers.push(c),
                              ConfigSource::AbsPath(c) => {
                                  // TODO check if it should be trusted
                                  // mercurial/ui.py:427
                                  let data = match std::fs::read(&c) {
                                      Err(_) => continue, // same as the python code
                                      Ok(data) => data,
                                  };
                                  layers.extend(ConfigLayer::parse(&c, &data)?)
                              }
                          }
                      }
                      Ok(Config {
                          layers,
                          plain: PlainInfo::empty(),
                      })
                  }
                  /// Loads the per-repository config into a new `Config` which is combined
                  /// with `self`.
                  pub(crate) fn combine_with_repo(
                      &self,
                      repo_config_files: &[PathBuf],
                  ) -> Result<Self, ConfigError> {
                      let (cli_layers, other_layers) = self
                          .layers
                          .iter()
                          .cloned()
                          .partition(ConfigLayer::is_from_command_line);
                      let mut repo_config = Self {
                          layers: other_layers,
                          plain: PlainInfo::empty(),
                      };
                      for path in repo_config_files {
                          // TODO: check if this file should be trusted:
                          // `mercurial/ui.py:427`
                          repo_config.add_trusted_file(path)?;
                      }
                      repo_config.layers.extend(cli_layers);
                      Ok(repo_config)
                  }
                  pub fn apply_plain(&mut self, plain: PlainInfo) {
                      self.plain = plain;
                  }
                  /// Returns the default value for the given config item, if any.
                  pub fn get_default(
                      &self,
                      section: &[u8],
                      item: &[u8],
                  ) -> Result<Option<&DefaultConfigItem>, HgError> {
                      let default_config = DEFAULT_CONFIG.as_ref().map_err(|e| {
                          HgError::abort(
                              e.to_string(),
                              crate::exit_codes::ABORT,
                              Some("`mercurial/configitems.toml` is not valid".into()),
                          )
                      })?;
                      let default_opt = default_config.get(section, item);
                      Ok(default_opt.filter(|default| {
                          default
                              .in_core_extension()
                              .map(|extension| {
                                  // Only return the default for an in-core extension item
                                  // if said extension is enabled
                                  self.is_extension_enabled(extension.as_bytes())
                              })
                              .unwrap_or(true)
                      }))
                  }
                  /// Return the config item that corresponds to a section + item, a function
                  /// to parse from the raw bytes to the expected type (which is passed as
                  /// a string only to make debugging easier).
                  /// Used by higher-level methods like `get_bool`.
                  ///
                  /// `fallback_to_default` controls whether the default value (if any) is
                  /// returned if nothing is found.
                  fn get_parse<'config, T: 'config>(
                      &'config self,
                      section: &[u8],
                      item: &[u8],
                      expected_type: &'static str,
                      parse: impl Fn(&'config [u8]) -> Option<T>,
                      fallback_to_default: bool,
                  ) -> Result<Option<T>, HgError>
                  where
                      Option<T>: TryFrom<&'config DefaultConfigItem, Error = HgError>,
                  {
                      match self.get_inner(section, item) {
                          Some((layer, v)) => match parse(&v.bytes) {
                              Some(b) => Ok(Some(b)),
                              None => Err(Box::new(ConfigValueParseErrorDetails {
                                  origin: layer.origin.to_owned(),
                                  line: v.line,
                                  value: v.bytes.to_owned(),
                                  section: section.to_owned(),
                                  item: item.to_owned(),
                                  expected_type,
                              })
                              .into()),
                          },
                          None => {
                              if !fallback_to_default {
                                  return Ok(None);
                              }
                              match self.get_default(section, item)? {
                                  Some(default) => {
                                      // Defaults are TOML values, so they're not in the same
                                      // shape as in the config files.
                                      // First try to convert directly to the expected type
                                      let as_t = default.try_into();
                                      match as_t {
                                          Ok(t) => Ok(t),
                                          Err(e) => {
                                              // If it fails, it means that...
                                              let as_bytes: Result<Option<&[u8]>, _> =
                                                  default.try_into();
                                              match as_bytes {
                                                  Ok(bytes_opt) => {
                                                      if let Some(bytes) = bytes_opt {
                                                          // ...we should be able to parse it
                                                          return Ok(parse(bytes));
                                                      }
                                                      Err(e)
                                                  }
                                                  Err(_) => Err(e),
                                              }
                                          }
                                      }
                                  }
                                  None => {
                                      self.print_devel_warning(section, item)?;
                                      Ok(None)
                                  }
                              }
                          }
                      }
                  }
                  fn print_devel_warning(
                      &self,
                      section: &[u8],
                      item: &[u8],
                  ) -> Result<(), HgError> {
                      let warn_all = self.get_bool(b"devel", b"all-warnings")?;
                      let warn_specific = self.get_bool(b"devel", b"warn-config-unknown")?;
                      if !warn_all || !warn_specific {
                          // We technically shouldn't print anything here since it's not
                          // the concern of `hg-core`.
                          //
                          // We're printing directly to stderr since development warnings
                          // are not on by default and surfacing this to consumer crates
                          // (like `rhg`) would be more difficult, probably requiring
                          // something à la `log` crate.
                          //
                          // TODO maybe figure out a way of exposing a "warnings" channel
                          // that consumer crates can hook into. It would be useful for
                          // all other warnings that `hg-core` could expose.
                          eprintln!(
                              "devel-warn: accessing unregistered config item: '{}.{}'",
                              String::from_utf8_lossy(section),
                              String::from_utf8_lossy(item),
                          );
                      }
                      Ok(())
                  }
                  /// Returns an `Err` if the first value found is not a valid UTF-8 string.
                  /// Otherwise, returns an `Ok(value)` if found, or `None`.
                  pub fn get_str(
                      &self,
                      section: &[u8],
                      item: &[u8],
                  ) -> Result<Option<&str>, HgError> {
                      self.get_parse(
                          section,
                          item,
                          "ASCII or UTF-8 string",
                          |value| str::from_utf8(value).ok(),
                          true,
                      )
                  }
                  /// Same as `get_str`, but doesn't fall back to the default `configitem`
                  /// if not defined in the user config.
                  pub fn get_str_no_default(
                      &self,
                      section: &[u8],
                      item: &[u8],
                  ) -> Result<Option<&str>, HgError> {
                      self.get_parse(
                          section,
                          item,
                          "ASCII or UTF-8 string",
                          |value| str::from_utf8(value).ok(),
                          false,
                      )
                  }
                  /// Returns an `Err` if the first value found is not a valid unsigned
                  /// integer. Otherwise, returns an `Ok(value)` if found, or `None`.
                  pub fn get_u32(
                      &self,
                      section: &[u8],
                      item: &[u8],
                  ) -> Result<Option<u32>, HgError> {
                      self.get_parse(
                          section,
                          item,
                          "valid integer",
                          |value| str::from_utf8(value).ok()?.parse().ok(),
                          true,
                      )
                  }
                  /// Returns an `Err` if the first value found is not a valid file size
                  /// value such as `30` (default unit is bytes), `7 MB`, or `42.5 kb`.
                  /// Otherwise, returns an `Ok(value_in_bytes)` if found, or `None`.
                  pub fn get_byte_size(
                      &self,
                      section: &[u8],
                      item: &[u8],
                  ) -> Result<Option<u64>, HgError> {
                      self.get_parse(
                          section,
                          item,
                          "byte quantity",
                          values::parse_byte_size,
                          true,
                      )
                  }
                  /// Returns an `Err` if the first value found is not a valid boolean.
                  /// Otherwise, returns an `Ok(option)`, where `option` is the boolean if
                  /// found, or `None`.
                  pub fn get_option(
                      &self,
                      section: &[u8],
                      item: &[u8],
                  ) -> Result<Option<bool>, HgError> {
                      self.get_parse(section, item, "boolean", values::parse_bool, true)
                  }
                  /// Same as `get_option`, but doesn't fall back to the default `configitem`
                  /// if not defined in the user config.
                  pub fn get_option_no_default(
                      &self,
                      section: &[u8],
                      item: &[u8],
                  ) -> Result<Option<bool>, HgError> {
                      self.get_parse(section, item, "boolean", values::parse_bool, false)
                  }
                  /// Returns the corresponding boolean in the config. Returns `Ok(false)`
                  /// if the value is not found, an `Err` if it's not a valid boolean.
                  pub fn get_bool(
                      &self,
                      section: &[u8],
                      item: &[u8],
                  ) -> Result<bool, HgError> {
                      Ok(self.get_option(section, item)?.unwrap_or(false))
                  }
                  /// Same as `get_bool`, but doesn't fall back to the default `configitem`
                  /// if not defined in the user config.
                  pub fn get_bool_no_default(
                      &self,
                      section: &[u8],
                      item: &[u8],
                  ) -> Result<bool, HgError> {
                      Ok(self.get_option_no_default(section, item)?.unwrap_or(false))
                  }
                  /// Returns `true` if the extension is enabled, `false` otherwise
                  pub fn is_extension_enabled(&self, extension: &[u8]) -> bool {
                      let value = self.get(b"extensions", extension);
                      match value {
                          Some(c) => !c.starts_with(b"!"),
                          None => false,
                      }
                  }
                  /// If there is an `item` value in `section`, parse and return a list of
                  /// byte strings.
                  pub fn get_list(
                      &self,
                      section: &[u8],
                      item: &[u8],
                  ) -> Option<Vec<Vec<u8>>> {
                      self.get(section, item).map(values::parse_list)
                  }
                  /// Returns the raw value bytes of the first one found, or `None`.
                  pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&[u8]> {
                      self.get_inner(section, item)
                          .map(|(_, value)| value.bytes.as_ref())
                  }
                  /// Returns the raw value bytes of the first one found, or `None`.
                  pub fn get_with_origin(
                      &self,
                      section: &[u8],
                      item: &[u8],
                  ) -> Option<(&[u8], &ConfigOrigin)> {
                      self.get_inner(section, item)
                          .map(|(layer, value)| (value.bytes.as_ref(), &layer.origin))
                  }
                  /// Returns the layer and the value of the first one found, or `None`.
                  fn get_inner(
                      &self,
                      section: &[u8],
                      item: &[u8],
                  ) -> Option<(&ConfigLayer, &ConfigValue)> {
                      // Filter out the config items that are hidden by [PLAIN].
                      // This differs from python hg where we delete them from the config.
                      let should_ignore = should_ignore(&self.plain, section, item);
                      for layer in self.layers.iter().rev() {
                          if !layer.trusted {
                              continue;
                          }
                          //The [PLAIN] config should not affect the defaults.
                          //
                          // However, PLAIN should also affect the "tweaked" defaults (unless
                          // "tweakdefault" is part of "HGPLAINEXCEPT").
                          //
                          // In practice the tweak-default layer is only added when it is
                          // relevant, so we can safely always take it into
                          // account here.
                          if should_ignore && !(layer.origin == ConfigOrigin::Tweakdefaults)
                          {
                              continue;
                          }
                          if let Some(v) = layer.get(section, item) {
                              return Some((layer, v));
                          }
                      }
                      None
                  }
                  /// Return all keys defined for the given section
                  pub fn get_section_keys(&self, section: &[u8]) -> HashSet<&[u8]> {
                      self.layers
                          .iter()
                          .flat_map(|layer| layer.iter_keys(section))
                          .collect()
                  }
                  /// Returns whether any key is defined in the given section
                  pub fn has_non_empty_section(&self, section: &[u8]) -> bool {
                      self.layers
                          .iter()
                          .any(|layer| layer.has_non_empty_section(section))
                  }
                  /// Yields (key, value) pairs for everything in the given section
                  pub fn iter_section<'a>(
                      &'a self,
                      section: &'a [u8],
                  ) -> impl Iterator<Item = (&[u8], &[u8])> + 'a {
                      // Deduplicate keys redefined in multiple layers
                      let mut keys_already_seen = HashSet::new();
                      let mut key_is_new =
                          move |&(key, _value): &(&'a [u8], &'a [u8])| -> bool {
                              keys_already_seen.insert(key)
                          };
                      // This is similar to `flat_map` + `filter_map`, except with a single
                      // closure that owns `key_is_new` (and therefore the
                      // `keys_already_seen` set):
                      let mut layer_iters = self
                          .layers
                          .iter()
                          .rev()
                          .map(move |layer| layer.iter_section(section))
                          .peekable();
                      std::iter::from_fn(move || loop {
                          if let Some(pair) = layer_iters.peek_mut()?.find(&mut key_is_new) {
                              return Some(pair);
                          } else {
                              layer_iters.next();
                          }
                      })
                  }
                  /// Get raw values bytes from all layers (even untrusted ones) in order
                  /// of precedence.
                  #[cfg(test)]
                  fn get_all(&self, section: &[u8], item: &[u8]) -> Vec<&[u8]> {
                      let mut res = vec![];
                      for layer in self.layers.iter().rev() {
                          if let Some(v) = layer.get(section, item) {
                              res.push(v.bytes.as_ref());
                          }
                      }
                      res
                  }
                  // a config layer that's introduced by ui.tweakdefaults
                  fn tweakdefaults_layer() -> ConfigLayer {
                      let mut layer = ConfigLayer::new(ConfigOrigin::Tweakdefaults);
                      let mut add = |section: &[u8], item: &[u8], value: &[u8]| {
                          layer.add(
                              section[..].into(),
                              item[..].into(),
                              value[..].into(),
                              None,
                          );
                      };
                      // duplication of [tweakrc] from [ui.py]
                      add(b"ui", b"rollback", b"False");
                      add(b"ui", b"statuscopies", b"yes");
                      add(b"ui", b"interface", b"curses");
                      add(b"ui", b"relative-paths", b"yes");
                      add(b"commands", b"grep.all-files", b"True");
                      add(b"commands", b"update.check", b"noconflict");
                      add(b"commands", b"status.verbose", b"True");
                      add(b"commands", b"resolve.explicit-re-merge", b"True");
                      add(b"git", b"git", b"1");
                      add(b"git", b"showfunc", b"1");
                      add(b"git", b"word-diff", b"1");
                      layer
                  }
                  // introduce the tweaked defaults as implied by ui.tweakdefaults
                  pub fn tweakdefaults(&mut self) {
                      self.layers.insert(0, Config::tweakdefaults_layer());
                  }
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  use pretty_assertions::assert_eq;
                  use std::fs::File;
                  use std::io::Write;
                  #[test]
                  fn test_include_layer_ordering() {
                      let tmpdir = tempfile::tempdir().unwrap();
                      let tmpdir_path = tmpdir.path();
                      let mut included_file =
-                         File::create(&tmpdir_path.join("included.rc")).unwrap();
+                         File::create(tmpdir_path.join("included.rc")).unwrap();
                      included_file.write_all(b"[section]\nitem=value1").unwrap();
                      let base_config_path = tmpdir_path.join("base.rc");
                      let mut config_file = File::create(&base_config_path).unwrap();
                      let data =
                          b"[section]\nitem=value0\n%include included.rc\nitem=value2\n\
                            [section2]\ncount = 4\nsize = 1.5 KB\nnot-count = 1.5\nnot-size = 1 ub";
                      config_file.write_all(data).unwrap();
                      let sources = vec![ConfigSource::AbsPath(base_config_path)];
                      let config = Config::load_from_explicit_sources(sources)
                          .expect("expected valid config");
                      let (_, value) = config.get_inner(b"section", b"item").unwrap();
                      assert_eq!(
                          value,
                          &ConfigValue {
                              bytes: b"value2".to_vec(),
                              line: Some(4)
                          }
                      );
                      let value = config.get(b"section", b"item").unwrap();
                      assert_eq!(value, b"value2",);
                      assert_eq!(
                          config.get_all(b"section", b"item"),
                          [b"value2", b"value1", b"value0"]
                      );
                      assert_eq!(config.get_u32(b"section2", b"count").unwrap(), Some(4));
                      assert_eq!(
                          config.get_byte_size(b"section2", b"size").unwrap(),
                          Some(1024 + 512)
                      );
                      assert!(config.get_u32(b"section2", b"not-count").is_err());
                      assert!(config.get_byte_size(b"section2", b"not-size").is_err());
                  }
                  #[test]
                  fn test_default_parse() {
                      let config = Config::load_from_explicit_sources(vec![])
                          .expect("expected valid config");
                      let ret = config.get_byte_size(b"cmdserver", b"max-log-size");
                      assert!(ret.is_ok(), "{:?}", ret);
                      let ret = config.get_byte_size(b"ui", b"formatted");
                      assert!(ret.unwrap().is_none());
                  }
              }

rust/hg-core/src/dagops.rs

0 +1 -2

              // dagops.rs
              //
              // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Miscellaneous DAG operations
              //!
              //! # Terminology
              //! - By *relative heads* of a collection of revision numbers (`Revision`), we
              //!   mean those revisions that have no children among the collection.
              //! - Similarly *relative roots* of a collection of `Revision`, we mean those
              //!   whose parents, if any, don't belong to the collection.
              use super::{Graph, GraphError, Revision, NULL_REVISION};
              use crate::ancestors::AncestorsIterator;
              use std::collections::{BTreeSet, HashSet};
              fn remove_parents<S: std::hash::BuildHasher>(
                  graph: &impl Graph,
                  rev: Revision,
                  set: &mut HashSet<Revision, S>,
              ) -> Result<(), GraphError> {
                  for parent in graph.parents(rev)?.iter() {
                      if *parent != NULL_REVISION {
                          set.remove(parent);
                      }
                  }
                  Ok(())
              }
              /// Relative heads out of some revisions, passed as an iterator.
              ///
              /// These heads are defined as those revisions that have no children
              /// among those emitted by the iterator.
              ///
              /// # Performance notes
              /// Internally, this clones the iterator, and builds a `HashSet` out of it.
              ///
              /// This function takes an `Iterator` instead of `impl IntoIterator` to
              /// guarantee that cloning the iterator doesn't result in cloning the full
              /// construct it comes from.
              pub fn heads<'a>(
                  graph: &impl Graph,
                  iter_revs: impl Clone + Iterator<Item = &'a Revision>,
              ) -> Result<HashSet<Revision>, GraphError> {
                  let mut heads: HashSet<Revision> = iter_revs.clone().cloned().collect();
                  heads.remove(&NULL_REVISION);
                  for rev in iter_revs {
                      if *rev != NULL_REVISION {
                          remove_parents(graph, *rev, &mut heads)?;
                      }
                  }
                  Ok(heads)
              }
              /// Retain in `revs` only its relative heads.
              ///
              /// This is an in-place operation, so that control of the incoming
              /// set is left to the caller.
              /// - a direct Python binding would probably need to build its own `HashSet`
              ///   from an incoming iterable, even if its sole purpose is to extract the
              ///   heads.
              /// - a Rust caller can decide whether cloning beforehand is appropriate
              ///
              /// # Performance notes
              /// Internally, this function will store a full copy of `revs` in a `Vec`.
              pub fn retain_heads<S: std::hash::BuildHasher>(
                  graph: &impl Graph,
                  revs: &mut HashSet<Revision, S>,
              ) -> Result<(), GraphError> {
                  revs.remove(&NULL_REVISION);
                  // we need to construct an iterable copy of revs to avoid itering while
                  // mutating
                  let as_vec: Vec<Revision> = revs.iter().cloned().collect();
                  for rev in as_vec {
                      if rev != NULL_REVISION {
                          remove_parents(graph, rev, revs)?;
                      }
                  }
                  Ok(())
              }
              /// Roots of `revs`, passed as a `HashSet`
              ///
              /// They are returned in arbitrary order
              pub fn roots<G: Graph, S: std::hash::BuildHasher>(
                  graph: &G,
                  revs: &HashSet<Revision, S>,
              ) -> Result<Vec<Revision>, GraphError> {
                  let mut roots: Vec<Revision> = Vec::new();
                  for rev in revs {
                      if graph
                          .parents(*rev)?
                          .iter()
                          .filter(|p| **p != NULL_REVISION)
                          .all(|p| !revs.contains(p))
                      {
                          roots.push(*rev);
                      }
                  }
                  Ok(roots)
              }
              /// Compute the topological range between two collections of revisions
              ///
              /// This is equivalent to the revset `<roots>::<heads>`.
              ///
              /// Currently, the given `Graph` has to implement `Clone`, which means
              /// actually cloning just a reference-counted Python pointer if
              /// it's passed over through `rust-cpython`. This is due to the internal
              /// use of `AncestorsIterator`
              ///
              /// # Algorithmic details
              ///
              /// This is a two-pass swipe inspired from what `reachableroots2` from
              /// `mercurial.cext.parsers` does to obtain the same results.
              ///
              /// - first, we climb up the DAG from `heads` in topological order, keeping
              ///   them in the vector `heads_ancestors` vector, and adding any element of
              ///   `roots` we find among them to the resulting range.
              /// - Then, we iterate on that recorded vector so that a revision is always
              ///   emitted after its parents and add all revisions whose parents are already
              ///   in the range to the results.
              ///
              /// # Performance notes
              ///
              /// The main difference with the C implementation is that
              /// the latter uses a flat array with bit flags, instead of complex structures
              /// like `HashSet`, making it faster in most scenarios. In theory, it's
              /// possible that the present implementation could be more memory efficient
              /// for very large repositories with many branches.
              pub fn range(
                  graph: &(impl Graph + Clone),
                  roots: impl IntoIterator<Item = Revision>,
                  heads: impl IntoIterator<Item = Revision>,
              ) -> Result<BTreeSet<Revision>, GraphError> {
                  let mut range = BTreeSet::new();
                  let roots: HashSet<Revision> = roots.into_iter().collect();
                  let min_root: Revision = match roots.iter().cloned().min() {
                      None => {
                          return Ok(range);
                      }
                      Some(r) => r,
                  };
                  // Internally, AncestorsIterator currently maintains a `HashSet`
                  // of all seen revision, which is also what we record, albeit in an ordered
                  // way. There's room for improvement on this duplication.
                  let ait = AncestorsIterator::new(graph.clone(), heads, min_root, true)?;
                  let mut heads_ancestors: Vec<Revision> = Vec::new();
                  for revres in ait {
                      let rev = revres?;
                      if roots.contains(&rev) {
                          range.insert(rev);
                      }
                      heads_ancestors.push(rev);
                  }
                  for rev in heads_ancestors.into_iter().rev() {
                      for parent in graph.parents(rev)?.iter() {
                          if *parent != NULL_REVISION && range.contains(parent) {
                              range.insert(rev);
                          }
                      }
                  }
                  Ok(range)
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  use crate::{testing::SampleGraph, BaseRevision};
                  /// Apply `retain_heads()` to the given slice and return as a sorted `Vec`
                  fn retain_heads_sorted(
                      graph: &impl Graph,
                      revs: &[BaseRevision],
                  ) -> Result<Vec<Revision>, GraphError> {
                      let mut revs: HashSet<Revision> =
                          revs.iter().cloned().map(Revision).collect();
                      retain_heads(graph, &mut revs)?;
                      let mut as_vec: Vec<Revision> = revs.iter().cloned().collect();
                      as_vec.sort_unstable();
                      Ok(as_vec)
                  }
                  #[test]
                  fn test_retain_heads() -> Result<(), GraphError> {
                      assert_eq!(retain_heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
                      assert_eq!(
                          retain_heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
                          vec![1, 6, 12]
                      );
                      assert_eq!(
                          retain_heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
                          vec![3, 5, 8, 9]
                      );
                      Ok(())
                  }
                  /// Apply `heads()` to the given slice and return as a sorted `Vec`
                  fn heads_sorted(
                      graph: &impl Graph,
                      revs: &[BaseRevision],
                  ) -> Result<Vec<Revision>, GraphError> {
-                     let iter_revs: Vec<_> =
-                         revs.into_iter().cloned().map(Revision).collect();
+                     let iter_revs: Vec<_> = revs.iter().cloned().map(Revision).collect();
                      let heads = heads(graph, iter_revs.iter())?;
                      let mut as_vec: Vec<Revision> = heads.iter().cloned().collect();
                      as_vec.sort_unstable();
                      Ok(as_vec)
                  }
                  #[test]
                  fn test_heads() -> Result<(), GraphError> {
                      assert_eq!(heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
                      assert_eq!(
                          heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
                          vec![1, 6, 12]
                      );
                      assert_eq!(
                          heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
                          vec![3, 5, 8, 9]
                      );
                      Ok(())
                  }
                  /// Apply `roots()` and sort the result for easier comparison
                  fn roots_sorted(
                      graph: &impl Graph,
                      revs: &[BaseRevision],
                  ) -> Result<Vec<Revision>, GraphError> {
                      let set: HashSet<_> = revs.iter().cloned().map(Revision).collect();
                      let mut as_vec = roots(graph, &set)?;
                      as_vec.sort_unstable();
                      Ok(as_vec)
                  }
                  #[test]
                  fn test_roots() -> Result<(), GraphError> {
                      assert_eq!(roots_sorted(&SampleGraph, &[4, 5, 6])?, vec![4]);
                      assert_eq!(
                          roots_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
                          vec![0, 4, 12]
                      );
                      assert_eq!(
                          roots_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
                          vec![1, 8]
                      );
                      Ok(())
                  }
                  /// Apply `range()` and convert the result into a Vec for easier comparison
                  fn range_vec(
                      graph: impl Graph + Clone,
                      roots: &[BaseRevision],
                      heads: &[BaseRevision],
                  ) -> Result<Vec<Revision>, GraphError> {
                      range(
                          &graph,
                          roots.iter().cloned().map(Revision),
                          heads.iter().cloned().map(Revision),
                      )
                      .map(|bs| bs.into_iter().collect())
                  }
                  #[test]
                  fn test_range() -> Result<(), GraphError> {
                      assert_eq!(range_vec(SampleGraph, &[0], &[4])?, vec![0, 1, 2, 4]);
                      assert_eq!(
                          range_vec(SampleGraph, &[0], &[8])?,
                          Vec::<Revision>::new()
                      );
                      assert_eq!(
                          range_vec(SampleGraph, &[5, 6], &[10, 11, 13])?,
                          vec![5, 10]
                      );
                      assert_eq!(
                          range_vec(SampleGraph, &[5, 6], &[10, 12])?,
                          vec![5, 6, 9, 10, 12]
                      );
                      Ok(())
                  }
              }

rust/hg-core/src/dirstate_tree/dirstate_map.rs

0 +2 -2

              use bytes_cast::BytesCast;
              use std::borrow::Cow;
              use std::path::PathBuf;
              use super::on_disk;
              use super::on_disk::DirstateV2ParseError;
              use super::owning::OwningDirstateMap;
              use super::path_with_basename::WithBasename;
              use crate::dirstate::parsers::pack_entry;
              use crate::dirstate::parsers::packed_entry_size;
              use crate::dirstate::parsers::parse_dirstate_entries;
              use crate::dirstate::CopyMapIter;
              use crate::dirstate::DirstateV2Data;
              use crate::dirstate::ParentFileData;
              use crate::dirstate::StateMapIter;
              use crate::dirstate::TruncatedTimestamp;
              use crate::matchers::Matcher;
              use crate::utils::filter_map_results;
              use crate::utils::hg_path::{HgPath, HgPathBuf};
              use crate::DirstateEntry;
              use crate::DirstateError;
              use crate::DirstateMapError;
              use crate::DirstateParents;
              use crate::DirstateStatus;
              use crate::FastHashbrownMap as FastHashMap;
              use crate::PatternFileWarning;
              use crate::StatusError;
              use crate::StatusOptions;
              /// Append to an existing data file if the amount of unreachable data (not used
              /// anymore) is less than this fraction of the total amount of existing data.
              const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5;
              #[derive(Debug, PartialEq, Eq)]
              /// Version of the on-disk format
              pub enum DirstateVersion {
                  V1,
                  V2,
              }
              #[derive(Debug, PartialEq, Eq)]
              pub enum DirstateMapWriteMode {
                  Auto,
                  ForceNewDataFile,
                  ForceAppend,
              }
              #[derive(Debug)]
              pub struct DirstateMap<'on_disk> {
                  /// Contents of the `.hg/dirstate` file
                  pub(super) on_disk: &'on_disk [u8],
                  pub(super) root: ChildNodes<'on_disk>,
                  /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
                  pub(super) nodes_with_entry_count: u32,
                  /// Number of nodes anywhere in the tree that have
                  /// `.copy_source.is_some()`.
                  pub(super) nodes_with_copy_source_count: u32,
                  /// See on_disk::Header
                  pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
                  /// How many bytes of `on_disk` are not used anymore
                  pub(super) unreachable_bytes: u32,
                  /// Size of the data used to first load this `DirstateMap`. Used in case
                  /// we need to write some new metadata, but no new data on disk,
                  /// as well as to detect writes that have happened in another process
                  /// since first read.
                  pub(super) old_data_size: usize,
                  /// UUID used when first loading this `DirstateMap`. Used to check if
                  /// the UUID has been changed by another process since first read.
                  /// Can be `None` if using dirstate v1 or if it's a brand new dirstate.
                  pub(super) old_uuid: Option<Vec<u8>>,
                  /// Identity of the dirstate file (for dirstate-v1) or the docket file
                  /// (v2). Used to detect if the file has changed from another process.
                  /// Since it's always written atomically, we can compare the inode to
                  /// check the file identity.
                  ///
                  /// TODO On non-Unix systems, something like hashing is a possibility?
                  pub(super) identity: Option<u64>,
                  pub(super) dirstate_version: DirstateVersion,
                  /// Controlled by config option `devel.dirstate.v2.data_update_mode`
                  pub(super) write_mode: DirstateMapWriteMode,
              }
              /// Using a plain `HgPathBuf` of the full path from the repository root as a
              /// map key would also work: all paths in a given map have the same parent
              /// path, so comparing full paths gives the same result as comparing base
              /// names. However `HashMap` would waste time always re-hashing the same
              /// string prefix.
              pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
              /// Similar to `&'tree Cow<'on_disk, HgPath>`, but can also be returned
              /// for on-disk nodes that don’t actually have a `Cow` to borrow.
              #[derive(Debug)]
              pub(super) enum BorrowedPath<'tree, 'on_disk> {
                  InMemory(&'tree HgPathBuf),
                  OnDisk(&'on_disk HgPath),
              }
              #[derive(Debug)]
              pub(super) enum ChildNodes<'on_disk> {
                  InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
                  OnDisk(&'on_disk [on_disk::Node]),
              }
              #[derive(Debug)]
              pub(super) enum ChildNodesRef<'tree, 'on_disk> {
                  InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
                  OnDisk(&'on_disk [on_disk::Node]),
              }
              #[derive(Debug)]
              pub(super) enum NodeRef<'tree, 'on_disk> {
                  InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
                  OnDisk(&'on_disk on_disk::Node),
              }
              impl<'tree, 'on_disk> BorrowedPath<'tree, 'on_disk> {
                  pub fn detach_from_tree(&self) -> Cow<'on_disk, HgPath> {
                      match *self {
                          BorrowedPath::InMemory(in_memory) => Cow::Owned(in_memory.clone()),
                          BorrowedPath::OnDisk(on_disk) => Cow::Borrowed(on_disk),
                      }
                  }
              }
              impl<'tree, 'on_disk> std::ops::Deref for BorrowedPath<'tree, 'on_disk> {
                  type Target = HgPath;
                  fn deref(&self) -> &HgPath {
                      match *self {
                          BorrowedPath::InMemory(in_memory) => in_memory,
                          BorrowedPath::OnDisk(on_disk) => on_disk,
                      }
                  }
              }
              impl Default for ChildNodes<'_> {
                  fn default() -> Self {
                      ChildNodes::InMemory(Default::default())
                  }
              }
              impl<'on_disk> ChildNodes<'on_disk> {
                  pub(super) fn as_ref<'tree>(
                      &'tree self,
                  ) -> ChildNodesRef<'tree, 'on_disk> {
                      match self {
                          ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
                          ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes),
                      }
                  }
                  pub(super) fn is_empty(&self) -> bool {
                      match self {
                          ChildNodes::InMemory(nodes) => nodes.is_empty(),
                          ChildNodes::OnDisk(nodes) => nodes.is_empty(),
                      }
                  }
                  fn make_mut(
                      &mut self,
                      on_disk: &'on_disk [u8],
                      unreachable_bytes: &mut u32,
                  ) -> Result<
                      &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
                      DirstateV2ParseError,
                  > {
                      match self {
                          ChildNodes::InMemory(nodes) => Ok(nodes),
                          ChildNodes::OnDisk(nodes) => {
                              *unreachable_bytes +=
-                                 std::mem::size_of_val::<[on_disk::Node]>(nodes) as u32;
+                                 std::mem::size_of_val::<[on_disk::Node]>(*nodes) as u32;
                              let nodes = nodes
                                  .iter()
                                  .map(|node| {
                                      Ok((
                                          node.path(on_disk)?,
                                          node.to_in_memory_node(on_disk)?,
                                      ))
                                  })
                                  .collect::<Result<_, _>>()?;
                              *self = ChildNodes::InMemory(nodes);
                              match self {
                                  ChildNodes::InMemory(nodes) => Ok(nodes),
                                  ChildNodes::OnDisk(_) => unreachable!(),
                              }
                          }
                      }
                  }
              }
              impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
                  pub(super) fn get(
                      &self,
                      base_name: &HgPath,
                      on_disk: &'on_disk [u8],
                  ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
                      match self {
                          ChildNodesRef::InMemory(nodes) => Ok(nodes
                              .get_key_value(base_name)
                              .map(|(k, v)| NodeRef::InMemory(k, v))),
                          ChildNodesRef::OnDisk(nodes) => {
                              let mut parse_result = Ok(());
                              let search_result = nodes.binary_search_by(|node| {
                                  match node.base_name(on_disk) {
                                      Ok(node_base_name) => node_base_name.cmp(base_name),
                                      Err(e) => {
                                          parse_result = Err(e);
                                          // Dummy comparison result, `search_result` won’t
                                          // be used since `parse_result` is an error
                                          std::cmp::Ordering::Equal
                                      }
                                  }
                              });
                              parse_result.map(|()| {
                                  search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i]))
                              })
                          }
                      }
                  }
                  /// Iterate in undefined order
                  pub(super) fn iter(
                      &self,
                  ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
                      match self {
                          ChildNodesRef::InMemory(nodes) => itertools::Either::Left(
                              nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)),
                          ),
                          ChildNodesRef::OnDisk(nodes) => {
                              itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk))
                          }
                      }
                  }
                  /// Iterate in parallel in undefined order
                  pub(super) fn par_iter(
                      &self,
                  ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
                  {
                      use rayon::prelude::*;
                      match self {
                          ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left(
                              nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)),
                          ),
                          ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right(
                              nodes.par_iter().map(NodeRef::OnDisk),
                          ),
                      }
                  }
                  pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
                      match self {
                          ChildNodesRef::InMemory(nodes) => {
                              let mut vec: Vec<_> = nodes
                                  .iter()
                                  .map(|(k, v)| NodeRef::InMemory(k, v))
                                  .collect();
                              fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
                                  match node {
                                      NodeRef::InMemory(path, _node) => path.base_name(),
                                      NodeRef::OnDisk(_) => unreachable!(),
                                  }
                              }
                              // `sort_unstable_by_key` doesn’t allow keys borrowing from the
                              // value: https://github.com/rust-lang/rust/issues/34162
                              vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
                              vec
                          }
                          ChildNodesRef::OnDisk(nodes) => {
                              // Nodes on disk are already sorted
                              nodes.iter().map(NodeRef::OnDisk).collect()
                          }
                      }
                  }
              }
              impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
                  pub(super) fn full_path(
                      &self,
                      on_disk: &'on_disk [u8],
                  ) -> Result<&'tree HgPath, DirstateV2ParseError> {
                      match self {
                          NodeRef::InMemory(path, _node) => Ok(path.full_path()),
                          NodeRef::OnDisk(node) => node.full_path(on_disk),
                      }
                  }
                  /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
                  /// HgPath>` detached from `'tree`
                  pub(super) fn full_path_borrowed(
                      &self,
                      on_disk: &'on_disk [u8],
                  ) -> Result<BorrowedPath<'tree, 'on_disk>, DirstateV2ParseError> {
                      match self {
                          NodeRef::InMemory(path, _node) => match path.full_path() {
                              Cow::Borrowed(on_disk) => Ok(BorrowedPath::OnDisk(on_disk)),
                              Cow::Owned(in_memory) => Ok(BorrowedPath::InMemory(in_memory)),
                          },
                          NodeRef::OnDisk(node) => {
                              Ok(BorrowedPath::OnDisk(node.full_path(on_disk)?))
                          }
                      }
                  }
                  pub(super) fn base_name(
                      &self,
                      on_disk: &'on_disk [u8],
                  ) -> Result<&'tree HgPath, DirstateV2ParseError> {
                      match self {
                          NodeRef::InMemory(path, _node) => Ok(path.base_name()),
                          NodeRef::OnDisk(node) => node.base_name(on_disk),
                      }
                  }
                  pub(super) fn children(
                      &self,
                      on_disk: &'on_disk [u8],
                  ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
                      match self {
                          NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
                          NodeRef::OnDisk(node) => {
                              Ok(ChildNodesRef::OnDisk(node.children(on_disk)?))
                          }
                      }
                  }
                  pub(super) fn has_copy_source(&self) -> bool {
                      match self {
                          NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
                          NodeRef::OnDisk(node) => node.has_copy_source(),
                      }
                  }
                  pub(super) fn copy_source(
                      &self,
                      on_disk: &'on_disk [u8],
                  ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
                      match self {
                          NodeRef::InMemory(_path, node) => Ok(node.copy_source.as_deref()),
                          NodeRef::OnDisk(node) => node.copy_source(on_disk),
                      }
                  }
                  /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
                  /// HgPath>` detached from `'tree`
                  pub(super) fn copy_source_borrowed(
                      &self,
                      on_disk: &'on_disk [u8],
                  ) -> Result<Option<BorrowedPath<'tree, 'on_disk>>, DirstateV2ParseError>
                  {
                      Ok(match self {
                          NodeRef::InMemory(_path, node) => {
                              node.copy_source.as_ref().map(|source| match source {
                                  Cow::Borrowed(on_disk) => BorrowedPath::OnDisk(on_disk),
                                  Cow::Owned(in_memory) => BorrowedPath::InMemory(in_memory),
                              })
                          }
                          NodeRef::OnDisk(node) => {
                              node.copy_source(on_disk)?.map(BorrowedPath::OnDisk)
                          }
                      })
                  }
                  pub(super) fn entry(
                      &self,
                  ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
                      match self {
                          NodeRef::InMemory(_path, node) => {
                              Ok(node.data.as_entry().copied())
                          }
                          NodeRef::OnDisk(node) => node.entry(),
                      }
                  }
                  pub(super) fn cached_directory_mtime(
                      &self,
                  ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
                      match self {
                          NodeRef::InMemory(_path, node) => Ok(match node.data {
                              NodeData::CachedDirectory { mtime } => Some(mtime),
                              _ => None,
                          }),
                          NodeRef::OnDisk(node) => node.cached_directory_mtime(),
                      }
                  }
                  pub(super) fn descendants_with_entry_count(&self) -> u32 {
                      match self {
                          NodeRef::InMemory(_path, node) => {
                              node.descendants_with_entry_count
                          }
                          NodeRef::OnDisk(node) => node.descendants_with_entry_count.get(),
                      }
                  }
                  pub(super) fn tracked_descendants_count(&self) -> u32 {
                      match self {
                          NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
                          NodeRef::OnDisk(node) => node.tracked_descendants_count.get(),
                      }
                  }
              }
              /// Represents a file or a directory
              #[derive(Default, Debug)]
              pub(super) struct Node<'on_disk> {
                  pub(super) data: NodeData,
                  pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
                  pub(super) children: ChildNodes<'on_disk>,
                  /// How many (non-inclusive) descendants of this node have an entry.
                  pub(super) descendants_with_entry_count: u32,
                  /// How many (non-inclusive) descendants of this node have an entry whose
                  /// state is "tracked".
                  pub(super) tracked_descendants_count: u32,
              }
              #[derive(Debug)]
              pub(super) enum NodeData {
                  Entry(DirstateEntry),
                  CachedDirectory { mtime: TruncatedTimestamp },
                  None,
              }
              impl Default for NodeData {
                  fn default() -> Self {
                      NodeData::None
                  }
              }
              impl NodeData {
                  fn has_entry(&self) -> bool {
                      matches!(self, NodeData::Entry(_))
                  }
                  fn as_entry(&self) -> Option<&DirstateEntry> {
                      match self {
                          NodeData::Entry(entry) => Some(entry),
                          _ => None,
                      }
                  }
                  fn as_entry_mut(&mut self) -> Option<&mut DirstateEntry> {
                      match self {
                          NodeData::Entry(entry) => Some(entry),
                          _ => None,
                      }
                  }
              }
              impl<'on_disk> DirstateMap<'on_disk> {
                  pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
                      Self {
                          on_disk,
                          root: ChildNodes::default(),
                          nodes_with_entry_count: 0,
                          nodes_with_copy_source_count: 0,
                          ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
                          unreachable_bytes: 0,
                          old_data_size: 0,
                          old_uuid: None,
                          identity: None,
                          dirstate_version: DirstateVersion::V1,
                          write_mode: DirstateMapWriteMode::Auto,
                      }
                  }
                  #[logging_timer::time("trace")]
                  pub fn new_v2(
                      on_disk: &'on_disk [u8],
                      data_size: usize,
                      metadata: &[u8],
                      uuid: Vec<u8>,
                      identity: Option<u64>,
                  ) -> Result<Self, DirstateError> {
                      if let Some(data) = on_disk.get(..data_size) {
                          Ok(on_disk::read(data, metadata, uuid, identity)?)
                      } else {
                          Err(DirstateV2ParseError::new("not enough bytes on disk").into())
                      }
                  }
                  #[logging_timer::time("trace")]
                  pub fn new_v1(
                      on_disk: &'on_disk [u8],
                      identity: Option<u64>,
                  ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
                      let mut map = Self::empty(on_disk);
                      if map.on_disk.is_empty() {
                          return Ok((map, None));
                      }
                      let parents = parse_dirstate_entries(
                          map.on_disk,
                          |path, entry, copy_source| {
                              let tracked = entry.tracked();
                              let node = Self::get_or_insert_node_inner(
                                  map.on_disk,
                                  &mut map.unreachable_bytes,
                                  &mut map.root,
                                  path,
                                  WithBasename::to_cow_borrowed,
                                  |ancestor| {
                                      if tracked {
                                          ancestor.tracked_descendants_count += 1
                                      }
                                      ancestor.descendants_with_entry_count += 1
                                  },
                              )?;
                              assert!(
                                  !node.data.has_entry(),
                                  "duplicate dirstate entry in read"
                              );
                              assert!(
                                  node.copy_source.is_none(),
                                  "duplicate dirstate entry in read"
                              );
                              node.data = NodeData::Entry(*entry);
                              node.copy_source = copy_source.map(Cow::Borrowed);
                              map.nodes_with_entry_count += 1;
                              if copy_source.is_some() {
                                  map.nodes_with_copy_source_count += 1
                              }
                              Ok(())
                          },
                      )?;
                      let parents = Some(*parents);
                      map.identity = identity;
                      Ok((map, parents))
                  }
                  /// Assuming dirstate-v2 format, returns whether the next write should
                  /// append to the existing data file that contains `self.on_disk` (true),
                  /// or create a new data file from scratch (false).
                  pub(super) fn write_should_append(&self) -> bool {
                      match self.write_mode {
                          DirstateMapWriteMode::ForceAppend => true,
                          DirstateMapWriteMode::ForceNewDataFile => false,
                          DirstateMapWriteMode::Auto => {
                              let ratio =
                                  self.unreachable_bytes as f32 / self.on_disk.len() as f32;
                              ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
                          }
                      }
                  }
                  fn get_node<'tree>(
                      &'tree self,
                      path: &HgPath,
                  ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
                      let mut children = self.root.as_ref();
                      let mut components = path.components();
                      let mut component =
                          components.next().expect("expected at least one components");
                      loop {
                          if let Some(child) = children.get(component, self.on_disk)? {
                              if let Some(next_component) = components.next() {
                                  component = next_component;
                                  children = child.children(self.on_disk)?;
                              } else {
                                  return Ok(Some(child));
                              }
                          } else {
                              return Ok(None);
                          }
                      }
                  }
                  pub fn has_node(
                      &self,
                      path: &HgPath,
                  ) -> Result<bool, DirstateV2ParseError> {
                      let node = self.get_node(path)?;
                      Ok(node.is_some())
                  }
                  /// Returns a mutable reference to the node at `path` if it exists
                  ///
                  /// `each_ancestor` is a callback that is called for each ancestor node
                  /// when descending the tree. It is used to keep the different counters
                  /// of the `DirstateMap` up-to-date.
                  fn get_node_mut<'tree>(
                      &'tree mut self,
                      path: &HgPath,
                      each_ancestor: impl FnMut(&mut Node),
                  ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
                      Self::get_node_mut_inner(
                          self.on_disk,
                          &mut self.unreachable_bytes,
                          &mut self.root,
                          path,
                          each_ancestor,
                      )
                  }
                  /// Lower-level version of `get_node_mut`.
                  ///
                  /// This takes `root` instead of `&mut self` so that callers can mutate
                  /// other fields while the returned borrow is still valid.
                  ///
                  /// `each_ancestor` is a callback that is called for each ancestor node
                  /// when descending the tree. It is used to keep the different counters
                  /// of the `DirstateMap` up-to-date.
                  fn get_node_mut_inner<'tree>(
                      on_disk: &'on_disk [u8],
                      unreachable_bytes: &mut u32,
                      root: &'tree mut ChildNodes<'on_disk>,
                      path: &HgPath,
                      mut each_ancestor: impl FnMut(&mut Node),
                  ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
                      let mut children = root;
                      let mut components = path.components();
                      let mut component =
                          components.next().expect("expected at least one components");
                      loop {
                          if let Some(child) = children
                              .make_mut(on_disk, unreachable_bytes)?
                              .get_mut(component)
                          {
                              if let Some(next_component) = components.next() {
                                  each_ancestor(child);
                                  component = next_component;
                                  children = &mut child.children;
                              } else {
                                  return Ok(Some(child));
                              }
                          } else {
                              return Ok(None);
                          }
                      }
                  }
                  /// Get a mutable reference to the node at `path`, creating it if it does
                  /// not exist.
                  ///
                  /// `each_ancestor` is a callback that is called for each ancestor node
                  /// when descending the tree. It is used to keep the different counters
                  /// of the `DirstateMap` up-to-date.
                  fn get_or_insert_node<'tree, 'path>(
                      &'tree mut self,
                      path: &'path HgPath,
                      each_ancestor: impl FnMut(&mut Node),
                  ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
                      Self::get_or_insert_node_inner(
                          self.on_disk,
                          &mut self.unreachable_bytes,
                          &mut self.root,
                          path,
                          WithBasename::to_cow_owned,
                          each_ancestor,
                      )
                  }
                  /// Lower-level version of `get_or_insert_node_inner`, which is used when
                  /// parsing disk data to remove allocations for new nodes.
                  fn get_or_insert_node_inner<'tree, 'path>(
                      on_disk: &'on_disk [u8],
                      unreachable_bytes: &mut u32,
                      root: &'tree mut ChildNodes<'on_disk>,
                      path: &'path HgPath,
                      to_cow: impl Fn(
                          WithBasename<&'path HgPath>,
                      ) -> WithBasename<Cow<'on_disk, HgPath>>,
                      mut each_ancestor: impl FnMut(&mut Node),
                  ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
                      let mut child_nodes = root;
                      let mut inclusive_ancestor_paths =
                          WithBasename::inclusive_ancestors_of(path);
                      let mut ancestor_path = inclusive_ancestor_paths
                          .next()
                          .expect("expected at least one inclusive ancestor");
                      loop {
                          let (_, child_node) = child_nodes
                              .make_mut(on_disk, unreachable_bytes)?
                              .raw_entry_mut()
                              .from_key(ancestor_path.base_name())
                              .or_insert_with(|| (to_cow(ancestor_path), Node::default()));
                          if let Some(next) = inclusive_ancestor_paths.next() {
                              each_ancestor(child_node);
                              ancestor_path = next;
                              child_nodes = &mut child_node.children;
                          } else {
                              return Ok(child_node);
                          }
                      }
                  }
                  #[allow(clippy::too_many_arguments)]
                  fn reset_state(
                      &mut self,
                      filename: &HgPath,
                      old_entry_opt: Option<DirstateEntry>,
                      wc_tracked: bool,
                      p1_tracked: bool,
                      p2_info: bool,
                      has_meaningful_mtime: bool,
                      parent_file_data_opt: Option<ParentFileData>,
                  ) -> Result<(), DirstateError> {
                      let (had_entry, was_tracked) = match old_entry_opt {
                          Some(old_entry) => (true, old_entry.tracked()),
                          None => (false, false),
                      };
                      let node = self.get_or_insert_node(filename, |ancestor| {
                          if !had_entry {
                              ancestor.descendants_with_entry_count += 1;
                          }
                          if was_tracked {
                              if !wc_tracked {
                                  ancestor.tracked_descendants_count = ancestor
                                      .tracked_descendants_count
                                      .checked_sub(1)
                                      .expect("tracked count to be >= 0");
                              }
                          } else if wc_tracked {
                              ancestor.tracked_descendants_count += 1;
                          }
                      })?;
                      let v2_data = if let Some(parent_file_data) = parent_file_data_opt {
                          DirstateV2Data {
                              wc_tracked,
                              p1_tracked,
                              p2_info,
                              mode_size: parent_file_data.mode_size,
                              mtime: if has_meaningful_mtime {
                                  parent_file_data.mtime
                              } else {
                                  None
                              },
                              ..Default::default()
                          }
                      } else {
                          DirstateV2Data {
                              wc_tracked,
                              p1_tracked,
                              p2_info,
                              ..Default::default()
                          }
                      };
                      node.data = NodeData::Entry(DirstateEntry::from_v2_data(v2_data));
                      if !had_entry {
                          self.nodes_with_entry_count += 1;
                      }
                      Ok(())
                  }
                  fn set_tracked(
                      &mut self,
                      filename: &HgPath,
                      old_entry_opt: Option<DirstateEntry>,
                  ) -> Result<bool, DirstateV2ParseError> {
                      let was_tracked = old_entry_opt.map_or(false, |e| e.tracked());
                      let had_entry = old_entry_opt.is_some();
-                     let tracked_count_increment = if was_tracked { 0 } else { 1 };
+                     let tracked_count_increment = u32::from(!was_tracked);
                      let mut new = false;
                      let node = self.get_or_insert_node(filename, |ancestor| {
                          if !had_entry {
                              ancestor.descendants_with_entry_count += 1;
                          }
                          ancestor.tracked_descendants_count += tracked_count_increment;
                      })?;
                      if let Some(old_entry) = old_entry_opt {
                          let mut e = old_entry;
                          if e.tracked() {
                              // XXX
                              // This is probably overkill for more case, but we need this to
                              // fully replace the `normallookup` call with `set_tracked`
                              // one. Consider smoothing this in the future.
                              e.set_possibly_dirty();
                          } else {
                              new = true;
                              e.set_tracked();
                          }
                          node.data = NodeData::Entry(e)
                      } else {
                          node.data = NodeData::Entry(DirstateEntry::new_tracked());
                          self.nodes_with_entry_count += 1;
                          new = true;
                      };
                      Ok(new)
                  }
                  /// Set a node as untracked in the dirstate.
                  ///
                  /// It is the responsibility of the caller to remove the copy source and/or
                  /// the entry itself if appropriate.
                  ///
                  /// # Panics
                  ///
                  /// Panics if the node does not exist.
                  fn set_untracked(
                      &mut self,
                      filename: &HgPath,
                      old_entry: DirstateEntry,
                  ) -> Result<(), DirstateV2ParseError> {
                      let node = self
                          .get_node_mut(filename, |ancestor| {
                              ancestor.tracked_descendants_count = ancestor
                                  .tracked_descendants_count
                                  .checked_sub(1)
                                  .expect("tracked_descendants_count should be >= 0");
                          })?
                          .expect("node should exist");
                      let mut new_entry = old_entry;
                      new_entry.set_untracked();
                      node.data = NodeData::Entry(new_entry);
                      Ok(())
                  }
                  /// Set a node as clean in the dirstate.
                  ///
                  /// It is the responsibility of the caller to remove the copy source.
                  ///
                  /// # Panics
                  ///
                  /// Panics if the node does not exist.
                  fn set_clean(
                      &mut self,
                      filename: &HgPath,
                      old_entry: DirstateEntry,
                      mode: u32,
                      size: u32,
                      mtime: TruncatedTimestamp,
                  ) -> Result<(), DirstateError> {
                      let node = self
                          .get_node_mut(filename, |ancestor| {
                              if !old_entry.tracked() {
                                  ancestor.tracked_descendants_count += 1;
                              }
                          })?
                          .expect("node should exist");
                      let mut new_entry = old_entry;
                      new_entry.set_clean(mode, size, mtime);
                      node.data = NodeData::Entry(new_entry);
                      Ok(())
                  }
                  /// Set a node as possibly dirty in the dirstate.
                  ///
                  /// # Panics
                  ///
                  /// Panics if the node does not exist.
                  fn set_possibly_dirty(
                      &mut self,
                      filename: &HgPath,
                  ) -> Result<(), DirstateError> {
                      let node = self
                          .get_node_mut(filename, |_ancestor| {})?
                          .expect("node should exist");
                      let entry = node.data.as_entry_mut().expect("entry should exist");
                      entry.set_possibly_dirty();
                      node.data = NodeData::Entry(*entry);
                      Ok(())
                  }
                  /// Clears the cached mtime for the (potential) folder at `path`.
                  pub(super) fn clear_cached_mtime(
                      &mut self,
                      path: &HgPath,
                  ) -> Result<(), DirstateV2ParseError> {
                      let node = match self.get_node_mut(path, |_ancestor| {})? {
                          Some(node) => node,
                          None => return Ok(()),
                      };
                      if let NodeData::CachedDirectory { .. } = &node.data {
                          node.data = NodeData::None
                      }
                      Ok(())
                  }
                  /// Sets the cached mtime for the (potential) folder at `path`.
                  pub(super) fn set_cached_mtime(
                      &mut self,
                      path: &HgPath,
                      mtime: TruncatedTimestamp,
                  ) -> Result<(), DirstateV2ParseError> {
                      let node = match self.get_node_mut(path, |_ancestor| {})? {
                          Some(node) => node,
                          None => return Ok(()),
                      };
                      match &node.data {
                          NodeData::Entry(_) => {} // Don’t overwrite an entry
                          NodeData::CachedDirectory { .. } | NodeData::None => {
                              node.data = NodeData::CachedDirectory { mtime }
                          }
                      }
                      Ok(())
                  }
                  fn iter_nodes<'tree>(
                      &'tree self,
                  ) -> impl Iterator<
                      Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>,
                  > + 'tree {
                      // Depth first tree traversal.
                      //
                      // If we could afford internal iteration and recursion,
                      // this would look like:
                      //
                      // ```
                      // fn traverse_children(
                      //     children: &ChildNodes,
                      //     each: &mut impl FnMut(&Node),
                      // ) {
                      //     for child in children.values() {
                      //         traverse_children(&child.children, each);
                      //         each(child);
                      //     }
                      // }
                      // ```
                      //
                      // However we want an external iterator and therefore can’t use the
                      // call stack. Use an explicit stack instead:
                      let mut stack = Vec::new();
                      let mut iter = self.root.as_ref().iter();
                      std::iter::from_fn(move || {
                          while let Some(child_node) = iter.next() {
                              let children = match child_node.children(self.on_disk) {
                                  Ok(children) => children,
                                  Err(error) => return Some(Err(error)),
                              };
                              // Pseudo-recursion
                              let new_iter = children.iter();
                              let old_iter = std::mem::replace(&mut iter, new_iter);
                              stack.push((child_node, old_iter));
                          }
                          // Found the end of a `children.iter()` iterator.
                          if let Some((child_node, next_iter)) = stack.pop() {
                              // "Return" from pseudo-recursion by restoring state from the
                              // explicit stack
                              iter = next_iter;
                              Some(Ok(child_node))
                          } else {
                              // Reached the bottom of the stack, we’re done
                              None
                          }
                      })
                  }
                  fn count_dropped_path(unreachable_bytes: &mut u32, path: Cow<HgPath>) {
                      if let Cow::Borrowed(path) = path {
                          *unreachable_bytes += path.len() as u32
                      }
                  }
                  pub(crate) fn set_write_mode(&mut self, write_mode: DirstateMapWriteMode) {
                      self.write_mode = write_mode;
                  }
              }
              type DebugDirstateTuple<'a> = (&'a HgPath, (u8, i32, i32, i32));
              impl OwningDirstateMap {
                  pub fn clear(&mut self) {
                      self.with_dmap_mut(|map| {
                          map.root = Default::default();
                          map.nodes_with_entry_count = 0;
                          map.nodes_with_copy_source_count = 0;
                          map.unreachable_bytes = map.on_disk.len() as u32;
                      });
                  }
                  pub fn set_tracked(
                      &mut self,
                      filename: &HgPath,
                  ) -> Result<bool, DirstateV2ParseError> {
                      let old_entry_opt = self.get(filename)?;
                      self.with_dmap_mut(|map| map.set_tracked(filename, old_entry_opt))
                  }
                  pub fn set_untracked(
                      &mut self,
                      filename: &HgPath,
                  ) -> Result<bool, DirstateError> {
                      let old_entry_opt = self.get(filename)?;
                      match old_entry_opt {
                          None => Ok(false),
                          Some(old_entry) => {
                              if !old_entry.tracked() {
                                  // `DirstateMap::set_untracked` is not a noop if
                                  // already not tracked as it will decrement the
                                  // tracked counters while going down.
                                  return Ok(true);
                              }
                              if old_entry.added() {
                                  // Untracking an "added" entry will just result in a
                                  // worthless entry (and other parts of the code will
                                  // complain about it), just drop it entirely.
                                  self.drop_entry_and_copy_source(filename)?;
                                  return Ok(true);
                              }
                              if !old_entry.p2_info() {
                                  self.copy_map_remove(filename)?;
                              }
                              self.with_dmap_mut(|map| {
                                  map.set_untracked(filename, old_entry)?;
                                  Ok(true)
                              })
                          }
                      }
                  }
                  pub fn set_clean(
                      &mut self,
                      filename: &HgPath,
                      mode: u32,
                      size: u32,
                      mtime: TruncatedTimestamp,
                  ) -> Result<(), DirstateError> {
                      let old_entry = match self.get(filename)? {
                          None => {
                              return Err(
                                  DirstateMapError::PathNotFound(filename.into()).into()
                              )
                          }
                          Some(e) => e,
                      };
                      self.copy_map_remove(filename)?;
                      self.with_dmap_mut(|map| {
                          map.set_clean(filename, old_entry, mode, size, mtime)
                      })
                  }
                  pub fn set_possibly_dirty(
                      &mut self,
                      filename: &HgPath,
                  ) -> Result<(), DirstateError> {
                      if self.get(filename)?.is_none() {
                          return Err(DirstateMapError::PathNotFound(filename.into()).into());
                      }
                      self.with_dmap_mut(|map| map.set_possibly_dirty(filename))
                  }
                  pub fn reset_state(
                      &mut self,
                      filename: &HgPath,
                      wc_tracked: bool,
                      p1_tracked: bool,
                      p2_info: bool,
                      has_meaningful_mtime: bool,
                      parent_file_data_opt: Option<ParentFileData>,
                  ) -> Result<(), DirstateError> {
                      if !(p1_tracked || p2_info || wc_tracked) {
                          self.drop_entry_and_copy_source(filename)?;
                          return Ok(());
                      }
                      self.copy_map_remove(filename)?;
                      let old_entry_opt = self.get(filename)?;
                      self.with_dmap_mut(|map| {
                          map.reset_state(
                              filename,
                              old_entry_opt,
                              wc_tracked,
                              p1_tracked,
                              p2_info,
                              has_meaningful_mtime,
                              parent_file_data_opt,
                          )
                      })
                  }
                  pub fn drop_entry_and_copy_source(
                      &mut self,
                      filename: &HgPath,
                  ) -> Result<(), DirstateError> {
                      let was_tracked = self.get(filename)?.map_or(false, |e| e.tracked());
                      struct Dropped {
                          was_tracked: bool,
                          had_entry: bool,
                          had_copy_source: bool,
                      }
                      /// If this returns `Ok(Some((dropped, removed)))`, then
                      ///
                      /// * `dropped` is about the leaf node that was at `filename`
                      /// * `removed` is whether this particular level of recursion just
                      ///   removed a node in `nodes`.
                      fn recur<'on_disk>(
                          on_disk: &'on_disk [u8],
                          unreachable_bytes: &mut u32,
                          nodes: &mut ChildNodes<'on_disk>,
                          path: &HgPath,
                      ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
                          let (first_path_component, rest_of_path) =
                              path.split_first_component();
                          let nodes = nodes.make_mut(on_disk, unreachable_bytes)?;
                          let node = if let Some(node) = nodes.get_mut(first_path_component)
                          {
                              node
                          } else {
                              return Ok(None);
                          };
                          let dropped;
                          if let Some(rest) = rest_of_path {
                              if let Some((d, removed)) = recur(
                                  on_disk,
                                  unreachable_bytes,
                                  &mut node.children,
                                  rest,
                              )? {
                                  dropped = d;
                                  if dropped.had_entry {
                                      node.descendants_with_entry_count = node
                                          .descendants_with_entry_count
                                          .checked_sub(1)
                                          .expect(
                                              "descendants_with_entry_count should be >= 0",
                                          );
                                  }
                                  if dropped.was_tracked {
                                      node.tracked_descendants_count = node
                                          .tracked_descendants_count
                                          .checked_sub(1)
                                          .expect(
                                              "tracked_descendants_count should be >= 0",
                                          );
                                  }
                                  // Directory caches must be invalidated when removing a
                                  // child node
                                  if removed {
                                      if let NodeData::CachedDirectory { .. } = &node.data {
                                          node.data = NodeData::None
                                      }
                                  }
                              } else {
                                  return Ok(None);
                              }
                          } else {
                              let entry = node.data.as_entry();
                              let was_tracked = entry.map_or(false, |entry| entry.tracked());
                              let had_entry = entry.is_some();
                              if had_entry {
                                  node.data = NodeData::None
                              }
                              let mut had_copy_source = false;
                              if let Some(source) = &node.copy_source {
                                  DirstateMap::count_dropped_path(
                                      unreachable_bytes,
                                      Cow::Borrowed(source),
                                  );
                                  had_copy_source = true;
                                  node.copy_source = None
                              }
                              dropped = Dropped {
                                  was_tracked,
                                  had_entry,
                                  had_copy_source,
                              };
                          }
                          // After recursion, for both leaf (rest_of_path is None) nodes and
                          // parent nodes, remove a node if it just became empty.
                          let remove = !node.data.has_entry()
                              && node.copy_source.is_none()
                              && node.children.is_empty();
                          if remove {
                              let (key, _) =
                                  nodes.remove_entry(first_path_component).unwrap();
                              DirstateMap::count_dropped_path(
                                  unreachable_bytes,
                                  Cow::Borrowed(key.full_path()),
                              )
                          }
                          Ok(Some((dropped, remove)))
                      }
                      self.with_dmap_mut(|map| {
                          if let Some((dropped, _removed)) = recur(
                              map.on_disk,
                              &mut map.unreachable_bytes,
                              &mut map.root,
                              filename,
                          )? {
                              if dropped.had_entry {
                                  map.nodes_with_entry_count = map
                                      .nodes_with_entry_count
                                      .checked_sub(1)
                                      .expect("nodes_with_entry_count should be >= 0");
                              }
                              if dropped.had_copy_source {
                                  map.nodes_with_copy_source_count = map
                                      .nodes_with_copy_source_count
                                      .checked_sub(1)
                                      .expect("nodes_with_copy_source_count should be >= 0");
                              }
                          } else {
                              debug_assert!(!was_tracked);
                          }
                          Ok(())
                      })
                  }
                  pub fn has_tracked_dir(
                      &mut self,
                      directory: &HgPath,
                  ) -> Result<bool, DirstateError> {
                      self.with_dmap_mut(|map| {
                          if let Some(node) = map.get_node(directory)? {
                              // A node without a `DirstateEntry` was created to hold child
                              // nodes, and is therefore a directory.
                              let is_dir = node.entry()?.is_none();
                              Ok(is_dir && node.tracked_descendants_count() > 0)
                          } else {
                              Ok(false)
                          }
                      })
                  }
                  pub fn has_dir(
                      &mut self,
                      directory: &HgPath,
                  ) -> Result<bool, DirstateError> {
                      self.with_dmap_mut(|map| {
                          if let Some(node) = map.get_node(directory)? {
                              // A node without a `DirstateEntry` was created to hold child
                              // nodes, and is therefore a directory.
                              let is_dir = node.entry()?.is_none();
                              Ok(is_dir && node.descendants_with_entry_count() > 0)
                          } else {
                              Ok(false)
                          }
                      })
                  }
                  #[logging_timer::time("trace")]
                  pub fn pack_v1(
                      &self,
                      parents: DirstateParents,
                  ) -> Result<Vec<u8>, DirstateError> {
                      let map = self.get_map();
                      // Optizimation (to be measured?): pre-compute size to avoid `Vec`
                      // reallocations
                      let mut size = parents.as_bytes().len();
                      for node in map.iter_nodes() {
                          let node = node?;
                          if node.entry()?.is_some() {
                              size += packed_entry_size(
                                  node.full_path(map.on_disk)?,
                                  node.copy_source(map.on_disk)?,
                              );
                          }
                      }
                      let mut packed = Vec::with_capacity(size);
                      packed.extend(parents.as_bytes());
                      for node in map.iter_nodes() {
                          let node = node?;
                          if let Some(entry) = node.entry()? {
                              pack_entry(
                                  node.full_path(map.on_disk)?,
                                  &entry,
                                  node.copy_source(map.on_disk)?,
                                  &mut packed,
                              );
                          }
                      }
                      Ok(packed)
                  }
                  /// Returns new data and metadata together with whether that data should be
                  /// appended to the existing data file whose content is at
                  /// `map.on_disk` (true), instead of written to a new data file
                  /// (false), and the previous size of data on disk.
                  #[logging_timer::time("trace")]
                  pub fn pack_v2(
                      &self,
                      write_mode: DirstateMapWriteMode,
                  ) -> Result<(Vec<u8>, on_disk::TreeMetadata, bool, usize), DirstateError>
                  {
                      let map = self.get_map();
                      on_disk::write(map, write_mode)
                  }
                  /// `callback` allows the caller to process and do something with the
                  /// results of the status. This is needed to do so efficiently (i.e.
                  /// without cloning the `DirstateStatus` object with its paths) because
                  /// we need to borrow from `Self`.
                  pub fn with_status<R>(
                      &mut self,
                      matcher: &(dyn Matcher + Sync),
                      root_dir: PathBuf,
                      ignore_files: Vec<PathBuf>,
                      options: StatusOptions,
                      callback: impl for<'r> FnOnce(
                          Result<(DirstateStatus<'r>, Vec<PatternFileWarning>), StatusError>,
                      ) -> R,
                  ) -> R {
                      self.with_dmap_mut(|map| {
                          callback(super::status::status(
                              map,
                              matcher,
                              root_dir,
                              ignore_files,
                              options,
                          ))
                      })
                  }
                  pub fn copy_map_len(&self) -> usize {
                      let map = self.get_map();
                      map.nodes_with_copy_source_count as usize
                  }
                  pub fn copy_map_iter(&self) -> CopyMapIter<'_> {
                      let map = self.get_map();
                      Box::new(filter_map_results(map.iter_nodes(), move |node| {
                          Ok(if let Some(source) = node.copy_source(map.on_disk)? {
                              Some((node.full_path(map.on_disk)?, source))
                          } else {
                              None
                          })
                      }))
                  }
                  pub fn copy_map_contains_key(
                      &self,
                      key: &HgPath,
                  ) -> Result<bool, DirstateV2ParseError> {
                      let map = self.get_map();
                      Ok(if let Some(node) = map.get_node(key)? {
                          node.has_copy_source()
                      } else {
                          false
                      })
                  }
                  pub fn copy_map_get(
                      &self,
                      key: &HgPath,
                  ) -> Result<Option<&HgPath>, DirstateV2ParseError> {
                      let map = self.get_map();
                      if let Some(node) = map.get_node(key)? {
                          if let Some(source) = node.copy_source(map.on_disk)? {
                              return Ok(Some(source));
                          }
                      }
                      Ok(None)
                  }
                  pub fn copy_map_remove(
                      &mut self,
                      key: &HgPath,
                  ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
                      self.with_dmap_mut(|map| {
                          let count = &mut map.nodes_with_copy_source_count;
                          let unreachable_bytes = &mut map.unreachable_bytes;
                          Ok(DirstateMap::get_node_mut_inner(
                              map.on_disk,
                              unreachable_bytes,
                              &mut map.root,
                              key,
                              |_ancestor| {},
                          )?
                          .and_then(|node| {
                              if let Some(source) = &node.copy_source {
                                  *count = count
                                      .checked_sub(1)
                                      .expect("nodes_with_copy_source_count should be >= 0");
                                  DirstateMap::count_dropped_path(
                                      unreachable_bytes,
                                      Cow::Borrowed(source),
                                  );
                              }
                              node.copy_source.take().map(Cow::into_owned)
                          }))
                      })
                  }
                  pub fn copy_map_insert(
                      &mut self,
                      key: &HgPath,
                      value: &HgPath,
                  ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
                      self.with_dmap_mut(|map| {
                          let node = map.get_or_insert_node(key, |_ancestor| {})?;
                          let had_copy_source = node.copy_source.is_none();
                          let old = node
                              .copy_source
                              .replace(value.to_owned().into())
                              .map(Cow::into_owned);
                          if had_copy_source {
                              map.nodes_with_copy_source_count += 1
                          }
                          Ok(old)
                      })
                  }
                  pub fn len(&self) -> usize {
                      let map = self.get_map();
                      map.nodes_with_entry_count as usize
                  }
                  pub fn is_empty(&self) -> bool {
                      self.len() == 0
                  }
                  pub fn contains_key(
                      &self,
                      key: &HgPath,
                  ) -> Result<bool, DirstateV2ParseError> {
                      Ok(self.get(key)?.is_some())
                  }
                  pub fn get(
                      &self,
                      key: &HgPath,
                  ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
                      let map = self.get_map();
                      Ok(if let Some(node) = map.get_node(key)? {
                          node.entry()?
                      } else {
                          None
                      })
                  }
                  pub fn iter(&self) -> StateMapIter<'_> {
                      let map = self.get_map();
                      Box::new(filter_map_results(map.iter_nodes(), move |node| {
                          Ok(if let Some(entry) = node.entry()? {
                              Some((node.full_path(map.on_disk)?, entry))
                          } else {
                              None
                          })
                      }))
                  }
                  pub fn iter_tracked_dirs(
                      &mut self,
                  ) -> Result<
                      Box<
                          dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
                              + Send
                              + '_,
                      >,
                      DirstateError,
                  > {
                      let map = self.get_map();
                      let on_disk = map.on_disk;
                      Ok(Box::new(filter_map_results(
                          map.iter_nodes(),
                          move |node| {
                              Ok(if node.tracked_descendants_count() > 0 {
                                  Some(node.full_path(on_disk)?)
                              } else {
                                  None
                              })
                          },
                      )))
                  }
                  /// Only public because it needs to be exposed to the Python layer.
                  /// It is not the full `setparents` logic, only the parts that mutate the
                  /// entries.
                  pub fn setparents_fixup(
                      &mut self,
                  ) -> Result<Vec<(HgPathBuf, HgPathBuf)>, DirstateV2ParseError> {
                      // XXX
                      // All the copying and re-querying is quite inefficient, but this is
                      // still a lot better than doing it from Python.
                      //
                      // The better solution is to develop a mechanism for `iter_mut`,
                      // which will be a lot more involved: we're dealing with a lazy,
                      // append-mostly, tree-like data structure. This will do for now.
                      let mut copies = vec![];
                      let mut files_with_p2_info = vec![];
                      for res in self.iter() {
                          let (path, entry) = res?;
                          if entry.p2_info() {
                              files_with_p2_info.push(path.to_owned())
                          }
                      }
                      self.with_dmap_mut(|map| {
                          for path in files_with_p2_info.iter() {
                              let node = map.get_or_insert_node(path, |_| {})?;
                              let entry =
                                  node.data.as_entry_mut().expect("entry should exist");
                              entry.drop_merge_data();
                              if let Some(source) = node.copy_source.take().as_deref() {
                                  copies.push((path.to_owned(), source.to_owned()));
                              }
                          }
                          Ok(copies)
                      })
                  }
                  pub fn debug_iter(
                      &self,
                      all: bool,
                  ) -> Box<
                      dyn Iterator<Item = Result<DebugDirstateTuple, DirstateV2ParseError>>
                          + Send
                          + '_,
                  > {
                      let map = self.get_map();
                      Box::new(filter_map_results(map.iter_nodes(), move |node| {
                          let debug_tuple = if let Some(entry) = node.entry()? {
                              entry.debug_tuple()
                          } else if !all {
                              return Ok(None);
                          } else if let Some(mtime) = node.cached_directory_mtime()? {
                              (b' ', 0, -1, mtime.truncated_seconds() as i32)
                          } else {
                              (b' ', 0, -1, -1)
                          };
                          Ok(Some((node.full_path(map.on_disk)?, debug_tuple)))
                      }))
                  }
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  /// Shortcut to return tracked descendants of a path.
                  /// Panics if the path does not exist.
                  fn tracked_descendants(map: &OwningDirstateMap, path: &[u8]) -> u32 {
                      let path = dbg!(HgPath::new(path));
                      let node = map.get_map().get_node(path);
                      node.unwrap().unwrap().tracked_descendants_count()
                  }
                  /// Shortcut to return descendants with an entry.
                  /// Panics if the path does not exist.
                  fn descendants_with_an_entry(map: &OwningDirstateMap, path: &[u8]) -> u32 {
                      let path = dbg!(HgPath::new(path));
                      let node = map.get_map().get_node(path);
                      node.unwrap().unwrap().descendants_with_entry_count()
                  }
                  fn assert_does_not_exist(map: &OwningDirstateMap, path: &[u8]) {
                      let path = dbg!(HgPath::new(path));
                      let node = map.get_map().get_node(path);
                      assert!(node.unwrap().is_none());
                  }
                  /// Shortcut for path creation in tests
                  fn p(b: &[u8]) -> &HgPath {
                      HgPath::new(b)
                  }
                  /// Test the very simple case a single tracked file
                  #[test]
                  fn test_tracked_descendants_simple() -> Result<(), DirstateError> {
                      let mut map = OwningDirstateMap::new_empty(vec![]);
                      assert_eq!(map.len(), 0);
                      map.set_tracked(p(b"some/nested/path"))?;
                      assert_eq!(map.len(), 1);
                      assert_eq!(tracked_descendants(&map, b"some"), 1);
                      assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
                      assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0);
                      map.set_untracked(p(b"some/nested/path"))?;
                      assert_eq!(map.len(), 0);
                      assert!(map.get_map().get_node(p(b"some"))?.is_none());
                      Ok(())
                  }
                  /// Test the simple case of all tracked, but multiple files
                  #[test]
                  fn test_tracked_descendants_multiple() -> Result<(), DirstateError> {
                      let mut map = OwningDirstateMap::new_empty(vec![]);
                      map.set_tracked(p(b"some/nested/path"))?;
                      map.set_tracked(p(b"some/nested/file"))?;
                      // one layer without any files to test deletion cascade
                      map.set_tracked(p(b"some/other/nested/path"))?;
                      map.set_tracked(p(b"root_file"))?;
                      map.set_tracked(p(b"some/file"))?;
                      map.set_tracked(p(b"some/file2"))?;
                      map.set_tracked(p(b"some/file3"))?;
                      assert_eq!(map.len(), 7);
                      assert_eq!(tracked_descendants(&map, b"some"), 6);
                      assert_eq!(tracked_descendants(&map, b"some/nested"), 2);
                      assert_eq!(tracked_descendants(&map, b"some/other"), 1);
                      assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
                      assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0);
                      map.set_untracked(p(b"some/nested/path"))?;
                      assert_eq!(map.len(), 6);
                      assert_eq!(tracked_descendants(&map, b"some"), 5);
                      assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
                      assert_eq!(tracked_descendants(&map, b"some/other"), 1);
                      assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
                      map.set_untracked(p(b"some/nested/file"))?;
                      assert_eq!(map.len(), 5);
                      assert_eq!(tracked_descendants(&map, b"some"), 4);
                      assert_eq!(tracked_descendants(&map, b"some/other"), 1);
                      assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
                      assert_does_not_exist(&map, b"some_nested");
                      map.set_untracked(p(b"some/other/nested/path"))?;
                      assert_eq!(map.len(), 4);
                      assert_eq!(tracked_descendants(&map, b"some"), 3);
                      assert_does_not_exist(&map, b"some/other");
                      map.set_untracked(p(b"root_file"))?;
                      assert_eq!(map.len(), 3);
                      assert_eq!(tracked_descendants(&map, b"some"), 3);
                      assert_does_not_exist(&map, b"root_file");
                      map.set_untracked(p(b"some/file"))?;
                      assert_eq!(map.len(), 2);
                      assert_eq!(tracked_descendants(&map, b"some"), 2);
                      assert_does_not_exist(&map, b"some/file");
                      map.set_untracked(p(b"some/file2"))?;
                      assert_eq!(map.len(), 1);
                      assert_eq!(tracked_descendants(&map, b"some"), 1);
                      assert_does_not_exist(&map, b"some/file2");
                      map.set_untracked(p(b"some/file3"))?;
                      assert_eq!(map.len(), 0);
                      assert_does_not_exist(&map, b"some/file3");
                      Ok(())
                  }
                  /// Check with a mix of tracked and non-tracked items
                  #[test]
                  fn test_tracked_descendants_different() -> Result<(), DirstateError> {
                      let mut map = OwningDirstateMap::new_empty(vec![]);
                      // A file that was just added
                      map.set_tracked(p(b"some/nested/path"))?;
                      // This has no information, the dirstate should ignore it
                      map.reset_state(p(b"some/file"), false, false, false, false, None)?;
                      assert_does_not_exist(&map, b"some/file");
                      // A file that was removed
                      map.reset_state(
                          p(b"some/nested/file"),
                          false,
                          true,
                          false,
                          false,
                          None,
                      )?;
                      assert!(!map.get(p(b"some/nested/file"))?.unwrap().tracked());
                      // Only present in p2
                      map.reset_state(p(b"some/file3"), false, false, true, false, None)?;
                      assert!(!map.get(p(b"some/file3"))?.unwrap().tracked());
                      // A file that was merged
                      map.reset_state(p(b"root_file"), true, true, true, false, None)?;
                      assert!(map.get(p(b"root_file"))?.unwrap().tracked());
                      // A file that is added, with info from p2
                      // XXX is that actually possible?
                      map.reset_state(p(b"some/file2"), true, false, true, false, None)?;
                      assert!(map.get(p(b"some/file2"))?.unwrap().tracked());
                      // A clean file
                      // One layer without any files to test deletion cascade
                      map.reset_state(
                          p(b"some/other/nested/path"),
                          true,
                          true,
                          false,
                          false,
                          None,
                      )?;
                      assert!(map.get(p(b"some/other/nested/path"))?.unwrap().tracked());
                      assert_eq!(map.len(), 6);
                      assert_eq!(tracked_descendants(&map, b"some"), 3);
                      assert_eq!(descendants_with_an_entry(&map, b"some"), 5);
                      assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
                      assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
                      assert_eq!(tracked_descendants(&map, b"some/other/nested/path"), 0);
                      assert_eq!(
                          descendants_with_an_entry(&map, b"some/other/nested/path"),
 
                      );
                      assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
                      assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
                      // might as well check this
                      map.set_untracked(p(b"path/does/not/exist"))?;
                      assert_eq!(map.len(), 6);
                      map.set_untracked(p(b"some/other/nested/path"))?;
                      // It is set untracked but not deleted since it held other information
                      assert_eq!(map.len(), 6);
                      assert_eq!(tracked_descendants(&map, b"some"), 2);
                      assert_eq!(descendants_with_an_entry(&map, b"some"), 5);
                      assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1);
                      assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
                      assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
                      assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
                      map.set_untracked(p(b"some/nested/path"))?;
                      // It is set untracked *and* deleted since it was only added
                      assert_eq!(map.len(), 5);
                      assert_eq!(tracked_descendants(&map, b"some"), 1);
                      assert_eq!(descendants_with_an_entry(&map, b"some"), 4);
                      assert_eq!(tracked_descendants(&map, b"some/nested"), 0);
                      assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 1);
                      assert_does_not_exist(&map, b"some/nested/path");
                      map.set_untracked(p(b"root_file"))?;
                      // Untracked but not deleted
                      assert_eq!(map.len(), 5);
                      assert!(map.get(p(b"root_file"))?.is_some());
                      map.set_untracked(p(b"some/file2"))?;
                      assert_eq!(map.len(), 5);
                      assert_eq!(tracked_descendants(&map, b"some"), 0);
                      assert!(map.get(p(b"some/file2"))?.is_some());
                      map.set_untracked(p(b"some/file3"))?;
                      assert_eq!(map.len(), 5);
                      assert_eq!(tracked_descendants(&map, b"some"), 0);
                      assert!(map.get(p(b"some/file3"))?.is_some());
                      Ok(())
                  }
                  /// Check that copies counter is correctly updated
                  #[test]
                  fn test_copy_source() -> Result<(), DirstateError> {
                      let mut map = OwningDirstateMap::new_empty(vec![]);
                      // Clean file
                      map.reset_state(p(b"files/clean"), true, true, false, false, None)?;
                      // Merged file
                      map.reset_state(p(b"files/from_p2"), true, true, true, false, None)?;
                      // Removed file
                      map.reset_state(p(b"removed"), false, true, false, false, None)?;
                      // Added file
                      map.reset_state(p(b"files/added"), true, false, false, false, None)?;
                      // Add copy
                      map.copy_map_insert(p(b"files/clean"), p(b"clean_copy_source"))?;
                      assert_eq!(map.copy_map_len(), 1);
                      // Copy override
                      map.copy_map_insert(p(b"files/clean"), p(b"other_clean_copy_source"))?;
                      assert_eq!(map.copy_map_len(), 1);
                      // Multiple copies
                      map.copy_map_insert(p(b"removed"), p(b"removed_copy_source"))?;
                      assert_eq!(map.copy_map_len(), 2);
                      map.copy_map_insert(p(b"files/added"), p(b"added_copy_source"))?;
                      assert_eq!(map.copy_map_len(), 3);
                      // Added, so the entry is completely removed
                      map.set_untracked(p(b"files/added"))?;
                      assert_does_not_exist(&map, b"files/added");
                      assert_eq!(map.copy_map_len(), 2);
                      // Removed, so the entry is kept around, so is its copy
                      map.set_untracked(p(b"removed"))?;
                      assert!(map.get(p(b"removed"))?.is_some());
                      assert_eq!(map.copy_map_len(), 2);
                      // Clean, so the entry is kept around, but not its copy
                      map.set_untracked(p(b"files/clean"))?;
                      assert!(map.get(p(b"files/clean"))?.is_some());
                      assert_eq!(map.copy_map_len(), 1);
                      map.copy_map_insert(p(b"files/from_p2"), p(b"from_p2_copy_source"))?;
                      assert_eq!(map.copy_map_len(), 2);
                      // Info from p2, so its copy source info is kept around
                      map.set_untracked(p(b"files/from_p2"))?;
                      assert!(map.get(p(b"files/from_p2"))?.is_some());
                      assert_eq!(map.copy_map_len(), 2);
                      Ok(())
                  }
                  /// Test with "on disk" data. For the sake of this test, the "on disk" data
                  /// does not actually come from the disk, but it's opaque to the code being
                  /// tested.
                  #[test]
                  fn test_on_disk() -> Result<(), DirstateError> {
                      // First let's create some data to put "on disk"
                      let mut map = OwningDirstateMap::new_empty(vec![]);
                      // A file that was just added
                      map.set_tracked(p(b"some/nested/added"))?;
                      map.copy_map_insert(p(b"some/nested/added"), p(b"added_copy_source"))?;
                      // A file that was removed
                      map.reset_state(
                          p(b"some/nested/removed"),
                          false,
                          true,
                          false,
                          false,
                          None,
                      )?;
                      // Only present in p2
                      map.reset_state(
                          p(b"other/p2_info_only"),
                          false,
                          false,
                          true,
                          false,
                          None,
                      )?;
                      map.copy_map_insert(
                          p(b"other/p2_info_only"),
                          p(b"other/p2_info_copy_source"),
                      )?;
                      // A file that was merged
                      map.reset_state(p(b"merged"), true, true, true, false, None)?;
                      // A file that is added, with info from p2
                      // XXX is that actually possible?
                      map.reset_state(
                          p(b"other/added_with_p2"),
                          true,
                          false,
                          true,
                          false,
                          None,
                      )?;
                      // One layer without any files to test deletion cascade
                      // A clean file
                      map.reset_state(
                          p(b"some/other/nested/clean"),
                          true,
                          true,
                          false,
                          false,
                          None,
                      )?;
                      let (packed, metadata, _should_append, _old_data_size) =
                          map.pack_v2(DirstateMapWriteMode::ForceNewDataFile)?;
                      let packed_len = packed.len();
                      assert!(packed_len > 0);
                      // Recreate "from disk"
                      let mut map = OwningDirstateMap::new_v2(
                          packed,
                          packed_len,
                          metadata.as_bytes(),
                          vec![],
                          None,
                      )?;
                      // Check that everything is accounted for
                      assert!(map.contains_key(p(b"some/nested/added"))?);
                      assert!(map.contains_key(p(b"some/nested/removed"))?);
                      assert!(map.contains_key(p(b"merged"))?);
                      assert!(map.contains_key(p(b"other/p2_info_only"))?);
                      assert!(map.contains_key(p(b"other/added_with_p2"))?);
                      assert!(map.contains_key(p(b"some/other/nested/clean"))?);
                      assert_eq!(
                          map.copy_map_get(p(b"some/nested/added"))?,
                          Some(p(b"added_copy_source"))
                      );
                      assert_eq!(
                          map.copy_map_get(p(b"other/p2_info_only"))?,
                          Some(p(b"other/p2_info_copy_source"))
                      );
                      assert_eq!(tracked_descendants(&map, b"some"), 2);
                      assert_eq!(descendants_with_an_entry(&map, b"some"), 3);
                      assert_eq!(tracked_descendants(&map, b"other"), 1);
                      assert_eq!(descendants_with_an_entry(&map, b"other"), 2);
                      assert_eq!(tracked_descendants(&map, b"some/other"), 1);
                      assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1);
                      assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
                      assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
                      assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
                      assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
                      assert_eq!(map.len(), 6);
                      assert_eq!(map.get_map().unreachable_bytes, 0);
                      assert_eq!(map.copy_map_len(), 2);
                      // Shouldn't change anything since it's already not tracked
                      map.set_untracked(p(b"some/nested/removed"))?;
                      assert_eq!(map.get_map().unreachable_bytes, 0);
                      if let ChildNodes::InMemory(_) = map.get_map().root {
                          panic!("root should not have been mutated")
                      }
                      // We haven't mutated enough (nothing, actually), we should still be in
                      // the append strategy
                      assert!(map.get_map().write_should_append());
                      // But this mutates the structure, so there should be unreachable_bytes
                      assert!(map.set_untracked(p(b"some/nested/added"))?);
                      let unreachable_bytes = map.get_map().unreachable_bytes;
                      assert!(unreachable_bytes > 0);
                      if let ChildNodes::OnDisk(_) = map.get_map().root {
                          panic!("root should have been mutated")
                      }
                      // This should not mutate the structure either, since `root` has
                      // already been mutated along with its direct children.
                      map.set_untracked(p(b"merged"))?;
                      assert_eq!(map.get_map().unreachable_bytes, unreachable_bytes);
                      if let NodeRef::InMemory(_, _) =
                          map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap()
                      {
                          panic!("'other/added_with_p2' should not have been mutated")
                      }
                      // But this should, since it's in a different path
                      // than `<root>some/nested/add`
                      map.set_untracked(p(b"other/added_with_p2"))?;
                      assert!(map.get_map().unreachable_bytes > unreachable_bytes);
                      if let NodeRef::OnDisk(_) =
                          map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap()
                      {
                          panic!("'other/added_with_p2' should have been mutated")
                      }
                      // We have rewritten most of the tree, we should create a new file
                      assert!(!map.get_map().write_should_append());
                      Ok(())
                  }
              }

rust/hg-core/src/dirstate_tree/status.rs

0 +1 -1

              use crate::dirstate::entry::TruncatedTimestamp;
              use crate::dirstate::status::IgnoreFnType;
              use crate::dirstate::status::StatusPath;
              use crate::dirstate_tree::dirstate_map::BorrowedPath;
              use crate::dirstate_tree::dirstate_map::ChildNodesRef;
              use crate::dirstate_tree::dirstate_map::DirstateMap;
              use crate::dirstate_tree::dirstate_map::DirstateVersion;
              use crate::dirstate_tree::dirstate_map::NodeRef;
              use crate::dirstate_tree::on_disk::DirstateV2ParseError;
              use crate::matchers::get_ignore_function;
              use crate::matchers::{Matcher, VisitChildrenSet};
              use crate::utils::files::get_bytes_from_os_string;
              use crate::utils::files::get_bytes_from_path;
              use crate::utils::files::get_path_from_bytes;
              use crate::utils::hg_path::hg_path_to_path_buf;
              use crate::utils::hg_path::HgPath;
              use crate::BadMatch;
              use crate::BadType;
              use crate::DirstateStatus;
              use crate::HgPathCow;
              use crate::PatternFileWarning;
              use crate::StatusError;
              use crate::StatusOptions;
              use once_cell::sync::OnceCell;
              use rayon::prelude::*;
              use sha1::{Digest, Sha1};
              use std::borrow::Cow;
              use std::io;
              use std::os::unix::prelude::FileTypeExt;
              use std::path::Path;
              use std::path::PathBuf;
              use std::sync::Mutex;
              use std::time::SystemTime;
              /// Returns the status of the working directory compared to its parent
              /// changeset.
              ///
              /// This algorithm is based on traversing the filesystem tree (`fs` in function
              /// and variable names) and dirstate tree at the same time. The core of this
              /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
              /// and its use of `itertools::merge_join_by`. When reaching a path that only
              /// exists in one of the two trees, depending on information requested by
              /// `options` we may need to traverse the remaining subtree.
              #[logging_timer::time("trace")]
              pub fn status<'dirstate>(
                  dmap: &'dirstate mut DirstateMap,
                  matcher: &(dyn Matcher + Sync),
                  root_dir: PathBuf,
                  ignore_files: Vec<PathBuf>,
                  options: StatusOptions,
              ) -> Result<(DirstateStatus<'dirstate>, Vec<PatternFileWarning>), StatusError>
              {
                  // Also cap for a Python caller of this function, but don't complain if
                  // the global threadpool has already been set since this code path is also
                  // being used by `rhg`, which calls this early.
                  let _ = crate::utils::cap_default_rayon_threads();
                  let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
                      if options.list_ignored || options.list_unknown {
                          let (ignore_fn, warnings, changed) = match dmap.dirstate_version {
                              DirstateVersion::V1 => {
                                  let (ignore_fn, warnings) = get_ignore_function(
                                      ignore_files,
                                      &root_dir,
                                      &mut |_source, _pattern_bytes| {},
                                  )?;
                                  (ignore_fn, warnings, None)
                              }
                              DirstateVersion::V2 => {
                                  let mut hasher = Sha1::new();
                                  let (ignore_fn, warnings) = get_ignore_function(
                                      ignore_files,
                                      &root_dir,
                                      &mut |source, pattern_bytes| {
                                          // If inside the repo, use the relative version to
                                          // make it deterministic inside tests.
                                          // The performance hit should be negligible.
                                          let source = source
                                              .strip_prefix(&root_dir)
                                              .unwrap_or(source);
                                          let source = get_bytes_from_path(source);
                                          let mut subhasher = Sha1::new();
                                          subhasher.update(pattern_bytes);
                                          let patterns_hash = subhasher.finalize();
                                          hasher.update(source);
                                          hasher.update(b" ");
                                          hasher.update(patterns_hash);
                                          hasher.update(b"\n");
                                      },
                                  )?;
                                  let new_hash = *hasher.finalize().as_ref();
                                  let changed = new_hash != dmap.ignore_patterns_hash;
                                  dmap.ignore_patterns_hash = new_hash;
                                  (ignore_fn, warnings, Some(changed))
                              }
                          };
                          (ignore_fn, warnings, changed)
                      } else {
                          (Box::new(|&_| true), vec![], None)
                      };
                  let filesystem_time_at_status_start =
                      filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
                  // If the repository is under the current directory, prefer using a
                  // relative path, so the kernel needs to traverse fewer directory in every
                  // call to `read_dir` or `symlink_metadata`.
                  // This is effective in the common case where the current directory is the
                  // repository root.
                  // TODO: Better yet would be to use libc functions like `openat` and
                  // `fstatat` to remove such repeated traversals entirely, but the standard
                  // library does not provide APIs based on those.
                  // Maybe with a crate like https://crates.io/crates/openat instead?
                  let root_dir = if let Some(relative) = std::env::current_dir()
                      .ok()
                      .and_then(|cwd| root_dir.strip_prefix(cwd).ok())
                  {
                      relative
                  } else {
                      &root_dir
                  };
                  let outcome = DirstateStatus {
                      filesystem_time_at_status_start,
                      ..Default::default()
                  };
                  let common = StatusCommon {
                      dmap,
                      options,
                      matcher,
                      ignore_fn,
                      outcome: Mutex::new(outcome),
                      ignore_patterns_have_changed: patterns_changed,
                      new_cacheable_directories: Default::default(),
                      outdated_cached_directories: Default::default(),
                      filesystem_time_at_status_start,
                  };
                  let is_at_repo_root = true;
                  let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
                  let has_ignored_ancestor = HasIgnoredAncestor::create(None, hg_path);
                  let root_cached_mtime = None;
                  // If the path we have for the repository root is a symlink, do follow it.
                  // (As opposed to symlinks within the working directory which are not
                  // followed, using `std::fs::symlink_metadata`.)
                  common.traverse_fs_directory_and_dirstate(
                      &has_ignored_ancestor,
                      dmap.root.as_ref(),
                      hg_path,
                      &DirEntry {
                          hg_path: Cow::Borrowed(HgPath::new(b"")),
                          fs_path: Cow::Borrowed(root_dir),
                          symlink_metadata: None,
                          file_type: FakeFileType::Directory,
                      },
                      root_cached_mtime,
                      is_at_repo_root,
                  )?;
                  if let Some(file_set) = common.matcher.file_set() {
                      for file in file_set {
                          if !file.is_empty() && !dmap.has_node(file)? {
                              let path = hg_path_to_path_buf(file)?;
                              if let io::Result::Err(error) =
                                  root_dir.join(path).symlink_metadata()
                              {
                                  common.io_error(error, file)
                              }
                          }
                      }
                  }
                  let mut outcome = common.outcome.into_inner().unwrap();
                  let new_cacheable = common.new_cacheable_directories.into_inner().unwrap();
                  let outdated = common.outdated_cached_directories.into_inner().unwrap();
                  outcome.dirty = common.ignore_patterns_have_changed == Some(true)
                      || !outdated.is_empty()
                      || (!new_cacheable.is_empty()
                          && dmap.dirstate_version == DirstateVersion::V2);
                  // Remove outdated mtimes before adding new mtimes, in case a given
                  // directory is both
                  for path in &outdated {
                      dmap.clear_cached_mtime(path)?;
                  }
                  for (path, mtime) in &new_cacheable {
                      dmap.set_cached_mtime(path, *mtime)?;
                  }
                  Ok((outcome, warnings))
              }
              /// Bag of random things needed by various parts of the algorithm. Reduces the
              /// number of parameters passed to functions.
              struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
                  dmap: &'tree DirstateMap<'on_disk>,
                  options: StatusOptions,
                  matcher: &'a (dyn Matcher + Sync),
                  ignore_fn: IgnoreFnType<'a>,
                  outcome: Mutex<DirstateStatus<'on_disk>>,
                  /// New timestamps of directories to be used for caching their readdirs
                  new_cacheable_directories:
                      Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
                  /// Used to invalidate the readdir cache of directories
                  outdated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
                  /// Whether ignore files like `.hgignore` have changed since the previous
                  /// time a `status()` call wrote their hash to the dirstate. `None` means
                  /// we don’t know as this run doesn’t list either ignored or uknown files
                  /// and therefore isn’t reading `.hgignore`.
                  ignore_patterns_have_changed: Option<bool>,
                  /// The current time at the start of the `status()` algorithm, as measured
                  /// and possibly truncated by the filesystem.
                  filesystem_time_at_status_start: Option<TruncatedTimestamp>,
              }
              enum Outcome {
                  Modified,
                  Added,
                  Removed,
                  Deleted,
                  Clean,
                  Ignored,
                  Unknown,
                  Unsure,
              }
              /// Lazy computation of whether a given path has a hgignored
              /// ancestor.
              struct HasIgnoredAncestor<'a> {
                  /// `path` and `parent` constitute the inputs to the computation,
                  /// `cache` stores the outcome.
                  path: &'a HgPath,
                  parent: Option<&'a HasIgnoredAncestor<'a>>,
                  cache: OnceCell<bool>,
              }
              impl<'a> HasIgnoredAncestor<'a> {
                  fn create(
                      parent: Option<&'a HasIgnoredAncestor<'a>>,
                      path: &'a HgPath,
                  ) -> HasIgnoredAncestor<'a> {
                      Self {
                          path,
                          parent,
                          cache: OnceCell::new(),
                      }
                  }
-                 fn force<'b>(&self, ignore_fn: &IgnoreFnType<'b>) -> bool {
+                 fn force(&self, ignore_fn: &IgnoreFnType<'_>) -> bool {
                      match self.parent {
                          None => false,
                          Some(parent) => {
                              *(self.cache.get_or_init(|| {
                                  parent.force(ignore_fn) || ignore_fn(self.path)
                              }))
                          }
                      }
                  }
              }
              impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
                  fn push_outcome(
                      &self,
                      which: Outcome,
                      dirstate_node: &NodeRef<'tree, 'on_disk>,
                  ) -> Result<(), DirstateV2ParseError> {
                      let path = dirstate_node
                          .full_path_borrowed(self.dmap.on_disk)?
                          .detach_from_tree();
                      let copy_source = if self.options.list_copies {
                          dirstate_node
                              .copy_source_borrowed(self.dmap.on_disk)?
                              .map(|source| source.detach_from_tree())
                      } else {
                          None
                      };
                      self.push_outcome_common(which, path, copy_source);
                      Ok(())
                  }
                  fn push_outcome_without_copy_source(
                      &self,
                      which: Outcome,
                      path: &BorrowedPath<'_, 'on_disk>,
                  ) {
                      self.push_outcome_common(which, path.detach_from_tree(), None)
                  }
                  fn push_outcome_common(
                      &self,
                      which: Outcome,
                      path: HgPathCow<'on_disk>,
                      copy_source: Option<HgPathCow<'on_disk>>,
                  ) {
                      let mut outcome = self.outcome.lock().unwrap();
                      let vec = match which {
                          Outcome::Modified => &mut outcome.modified,
                          Outcome::Added => &mut outcome.added,
                          Outcome::Removed => &mut outcome.removed,
                          Outcome::Deleted => &mut outcome.deleted,
                          Outcome::Clean => &mut outcome.clean,
                          Outcome::Ignored => &mut outcome.ignored,
                          Outcome::Unknown => &mut outcome.unknown,
                          Outcome::Unsure => &mut outcome.unsure,
                      };
                      vec.push(StatusPath { path, copy_source });
                  }
                  fn read_dir(
                      &self,
                      hg_path: &HgPath,
                      fs_path: &Path,
                      is_at_repo_root: bool,
                  ) -> Result<Vec<DirEntry>, ()> {
                      DirEntry::read_dir(fs_path, is_at_repo_root)
                          .map_err(|error| self.io_error(error, hg_path))
                  }
                  fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
                      let errno = error.raw_os_error().expect("expected real OS error");
                      self.outcome
                          .lock()
                          .unwrap()
                          .bad
                          .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
                  }
                  fn check_for_outdated_directory_cache(
                      &self,
                      dirstate_node: &NodeRef<'tree, 'on_disk>,
                  ) -> Result<bool, DirstateV2ParseError> {
                      if self.ignore_patterns_have_changed == Some(true)
                          && dirstate_node.cached_directory_mtime()?.is_some()
                      {
                          self.outdated_cached_directories.lock().unwrap().push(
                              dirstate_node
                                  .full_path_borrowed(self.dmap.on_disk)?
                                  .detach_from_tree(),
                          );
                          return Ok(true);
                      }
                      Ok(false)
                  }
                  /// If this returns true, we can get accurate results by only using
                  /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
                  /// need to call `read_dir`.
                  fn can_skip_fs_readdir(
                      &self,
                      directory_entry: &DirEntry,
                      cached_directory_mtime: Option<TruncatedTimestamp>,
                  ) -> bool {
                      if !self.options.list_unknown && !self.options.list_ignored {
                          // All states that we care about listing have corresponding
                          // dirstate entries.
                          // This happens for example with `hg status -mard`.
                          return true;
                      }
                      if !self.options.list_ignored
                          && self.ignore_patterns_have_changed == Some(false)
                      {
                          if let Some(cached_mtime) = cached_directory_mtime {
                              // The dirstate contains a cached mtime for this directory, set
                              // by a previous run of the `status` algorithm which found this
                              // directory eligible for `read_dir` caching.
                              if let Ok(meta) = directory_entry.symlink_metadata() {
                                  if cached_mtime
                                      .likely_equal_to_mtime_of(&meta)
                                      .unwrap_or(false)
                                  {
                                      // The mtime of that directory has not changed
                                      // since then, which means that the results of
                                      // `read_dir` should also be unchanged.
                                      return true;
                                  }
                              }
                          }
                      }
                      false
                  }
                  fn should_visit(set: &VisitChildrenSet, basename: &HgPath) -> bool {
                      match set {
                          VisitChildrenSet::This | VisitChildrenSet::Recursive => true,
                          VisitChildrenSet::Empty => false,
                          VisitChildrenSet::Set(children_to_visit) => {
                              children_to_visit.contains(basename)
                          }
                      }
                  }
                  /// Returns whether all child entries of the filesystem directory have a
                  /// corresponding dirstate node or are ignored.
                  fn traverse_fs_directory_and_dirstate<'ancestor>(
                      &self,
                      has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
                      dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
                      directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
                      directory_entry: &DirEntry,
                      cached_directory_mtime: Option<TruncatedTimestamp>,
                      is_at_repo_root: bool,
                  ) -> Result<bool, DirstateV2ParseError> {
                      let children_set = self.matcher.visit_children_set(directory_hg_path);
                      if let VisitChildrenSet::Empty = children_set {
                          return Ok(false);
                      }
                      if self.can_skip_fs_readdir(directory_entry, cached_directory_mtime) {
                          dirstate_nodes
                              .par_iter()
                              .map(|dirstate_node| {
                                  let fs_path = &directory_entry.fs_path;
                                  let basename =
                                      dirstate_node.base_name(self.dmap.on_disk)?.as_bytes();
                                  let fs_path = fs_path.join(get_path_from_bytes(basename));
                                  if !Self::should_visit(
                                      &children_set,
                                      HgPath::new(basename),
                                  ) {
                                      return Ok(());
                                  }
                                  match std::fs::symlink_metadata(&fs_path) {
                                      Ok(fs_metadata) => {
                                          let file_type = fs_metadata.file_type().into();
                                          let entry = DirEntry {
                                              hg_path: Cow::Borrowed(
                                                  dirstate_node
                                                      .full_path(self.dmap.on_disk)?,
                                              ),
                                              fs_path: Cow::Borrowed(&fs_path),
                                              symlink_metadata: Some(fs_metadata),
                                              file_type,
                                          };
                                          self.traverse_fs_and_dirstate(
                                              &entry,
                                              dirstate_node,
                                              has_ignored_ancestor,
                                          )
                                      }
                                      Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
                                          self.traverse_dirstate_only(dirstate_node)
                                      }
                                      Err(error) => {
                                          let hg_path =
                                              dirstate_node.full_path(self.dmap.on_disk)?;
                                          self.io_error(error, hg_path);
                                          Ok(())
                                      }
                                  }
                              })
                              .collect::<Result<_, _>>()?;
                          // We don’t know, so conservatively say this isn’t the case
                          let children_all_have_dirstate_node_or_are_ignored = false;
                          return Ok(children_all_have_dirstate_node_or_are_ignored);
                      }
                      let readdir_succeeded;
                      let mut fs_entries = if let Ok(entries) = self.read_dir(
                          directory_hg_path,
                          &directory_entry.fs_path,
                          is_at_repo_root,
                      ) {
                          readdir_succeeded = true;
                          entries
                      } else {
                          // Treat an unreadable directory (typically because of insufficient
                          // permissions) like an empty directory. `self.read_dir` has
                          // already called `self.io_error` so a warning will be emitted.
                          // We still need to remember that there was an error so that we
                          // know not to cache this result.
                          readdir_succeeded = false;
                          Vec::new()
                      };
                      // `merge_join_by` requires both its input iterators to be sorted:
                      let dirstate_nodes = dirstate_nodes.sorted();
                      // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
                      // https://github.com/rust-lang/rust/issues/34162
                      fs_entries.sort_unstable_by(|e1, e2| e1.hg_path.cmp(&e2.hg_path));
                      // Propagate here any error that would happen inside the comparison
                      // callback below
                      for dirstate_node in &dirstate_nodes {
                          dirstate_node.base_name(self.dmap.on_disk)?;
                      }
                      itertools::merge_join_by(
                          dirstate_nodes,
                          &fs_entries,
                          |dirstate_node, fs_entry| {
                              // This `unwrap` never panics because we already propagated
                              // those errors above
                              dirstate_node
                                  .base_name(self.dmap.on_disk)
                                  .unwrap()
                                  .cmp(&fs_entry.hg_path)
                          },
                      )
                      .par_bridge()
                      .map(|pair| {
                          use itertools::EitherOrBoth::*;
                          let basename = match &pair {
                              Left(dirstate_node) | Both(dirstate_node, _) => HgPath::new(
                                  dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
                              ),
                              Right(fs_entry) => &fs_entry.hg_path,
                          };
                          if !Self::should_visit(&children_set, basename) {
                              return Ok(false);
                          }
                          let has_dirstate_node_or_is_ignored = match pair {
                              Both(dirstate_node, fs_entry) => {
                                  self.traverse_fs_and_dirstate(
                                      fs_entry,
                                      dirstate_node,
                                      has_ignored_ancestor,
                                  )?;
                                  true
                              }
                              Left(dirstate_node) => {
                                  self.traverse_dirstate_only(dirstate_node)?;
                                  true
                              }
                              Right(fs_entry) => self.traverse_fs_only(
                                  has_ignored_ancestor.force(&self.ignore_fn),
                                  directory_hg_path,
                                  fs_entry,
                              ),
                          };
                          Ok(has_dirstate_node_or_is_ignored)
                      })
                      .try_reduce(|| true, |a, b| Ok(a && b))
                      .map(|res| res && readdir_succeeded)
                  }
                  fn traverse_fs_and_dirstate<'ancestor>(
                      &self,
                      fs_entry: &DirEntry,
                      dirstate_node: NodeRef<'tree, 'on_disk>,
                      has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
                  ) -> Result<(), DirstateV2ParseError> {
                      let outdated_dircache =
                          self.check_for_outdated_directory_cache(&dirstate_node)?;
                      let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
                      let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink();
                      if !file_or_symlink {
                          // If we previously had a file here, it was removed (with
                          // `hg rm` or similar) or deleted before it could be
                          // replaced by a directory or something else.
                          self.mark_removed_or_deleted_if_file(&dirstate_node)?;
                      }
                      if let Some(bad_type) = fs_entry.is_bad() {
                          if self.matcher.exact_match(hg_path) {
                              let path = dirstate_node.full_path(self.dmap.on_disk)?;
                              self.outcome.lock().unwrap().bad.push((
                                  path.to_owned().into(),
                                  BadMatch::BadType(bad_type),
                              ))
                          }
                      }
                      if fs_entry.is_dir() {
                          if self.options.collect_traversed_dirs {
                              self.outcome
                                  .lock()
                                  .unwrap()
                                  .traversed
                                  .push(hg_path.detach_from_tree())
                          }
                          let is_ignored = HasIgnoredAncestor::create(
                              Some(has_ignored_ancestor),
                              hg_path,
                          );
                          let is_at_repo_root = false;
                          let children_all_have_dirstate_node_or_are_ignored = self
                              .traverse_fs_directory_and_dirstate(
                                  &is_ignored,
                                  dirstate_node.children(self.dmap.on_disk)?,
                                  hg_path,
                                  fs_entry,
                                  dirstate_node.cached_directory_mtime()?,
                                  is_at_repo_root,
                              )?;
                          self.maybe_save_directory_mtime(
                              children_all_have_dirstate_node_or_are_ignored,
                              fs_entry,
                              dirstate_node,
                              outdated_dircache,
                          )?
                      } else {
                          if file_or_symlink && self.matcher.matches(hg_path) {
                              if let Some(entry) = dirstate_node.entry()? {
                                  if !entry.any_tracked() {
                                      // Forward-compat if we start tracking unknown/ignored
                                      // files for caching reasons
                                      self.mark_unknown_or_ignored(
                                          has_ignored_ancestor.force(&self.ignore_fn),
                                          hg_path,
                                      );
                                  }
                                  if entry.added() {
                                      self.push_outcome(Outcome::Added, &dirstate_node)?;
                                  } else if entry.removed() {
                                      self.push_outcome(Outcome::Removed, &dirstate_node)?;
                                  } else if entry.modified() {
                                      self.push_outcome(Outcome::Modified, &dirstate_node)?;
                                  } else {
                                      self.handle_normal_file(&dirstate_node, fs_entry)?;
                                  }
                              } else {
                                  // `node.entry.is_none()` indicates a "directory"
                                  // node, but the filesystem has a file
                                  self.mark_unknown_or_ignored(
                                      has_ignored_ancestor.force(&self.ignore_fn),
                                      hg_path,
                                  );
                              }
                          }
                          for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
                          {
                              self.traverse_dirstate_only(child_node)?
                          }
                      }
                      Ok(())
                  }
                  /// Save directory mtime if applicable.
                  ///
                  /// `outdated_directory_cache` is `true` if we've just invalidated the
                  /// cache for this directory in `check_for_outdated_directory_cache`,
                  /// which forces the update.
                  fn maybe_save_directory_mtime(
                      &self,
                      children_all_have_dirstate_node_or_are_ignored: bool,
                      directory_entry: &DirEntry,
                      dirstate_node: NodeRef<'tree, 'on_disk>,
                      outdated_directory_cache: bool,
                  ) -> Result<(), DirstateV2ParseError> {
                      if !children_all_have_dirstate_node_or_are_ignored {
                          return Ok(());
                      }
                      // All filesystem directory entries from `read_dir` have a
                      // corresponding node in the dirstate, so we can reconstitute the
                      // names of those entries without calling `read_dir` again.
                      // TODO: use let-else here and below when available:
                      // https://github.com/rust-lang/rust/issues/87335
                      let status_start = if let Some(status_start) =
                          &self.filesystem_time_at_status_start
                      {
                          status_start
                      } else {
                          return Ok(());
                      };
                      // Although the Rust standard library’s `SystemTime` type
                      // has nanosecond precision, the times reported for a
                      // directory’s (or file’s) modified time may have lower
                      // resolution based on the filesystem (for example ext3
                      // only stores integer seconds), kernel (see
                      // https://stackoverflow.com/a/14393315/1162888), etc.
                      let metadata = match directory_entry.symlink_metadata() {
                          Ok(meta) => meta,
                          Err(_) => return Ok(()),
                      };
                      let directory_mtime = match TruncatedTimestamp::for_reliable_mtime_of(
                          &metadata,
                          status_start,
                      ) {
                          Ok(Some(directory_mtime)) => directory_mtime,
                          Ok(None) => {
                              // The directory was modified too recently,
                              // don’t cache its `read_dir` results.
                              //
                              // 1. A change to this directory (direct child was
                              //    added or removed) cause its mtime to be set
                              //    (possibly truncated) to `directory_mtime`
                              // 2. This `status` algorithm calls `read_dir`
                              // 3. An other change is made to the same directory is
                              //    made so that calling `read_dir` agin would give
                              //    different results, but soon enough after 1. that
                              //    the mtime stays the same
                              //
                              // On a system where the time resolution poor, this
                              // scenario is not unlikely if all three steps are caused
                              // by the same script.
                              return Ok(());
                          }
                          Err(_) => {
                              // OS/libc does not support mtime?
                              return Ok(());
                          }
                      };
                      // We’ve observed (through `status_start`) that time has
                      // “progressed” since `directory_mtime`, so any further
                      // change to this directory is extremely likely to cause a
                      // different mtime.
                      //
                      // Having the same mtime again is not entirely impossible
                      // since the system clock is not monotonous. It could jump
                      // backward to some point before `directory_mtime`, then a
                      // directory change could potentially happen during exactly
                      // the wrong tick.
                      //
                      // We deem this scenario (unlike the previous one) to be
                      // unlikely enough in practice.
                      let is_up_to_date = if let Some(cached) =
                          dirstate_node.cached_directory_mtime()?
                      {
                          !outdated_directory_cache && cached.likely_equal(directory_mtime)
                      } else {
                          false
                      };
                      if !is_up_to_date {
                          let hg_path = dirstate_node
                              .full_path_borrowed(self.dmap.on_disk)?
                              .detach_from_tree();
                          self.new_cacheable_directories
                              .lock()
                              .unwrap()
                              .push((hg_path, directory_mtime))
                      }
                      Ok(())
                  }
                  /// A file that is clean in the dirstate was found in the filesystem
                  fn handle_normal_file(
                      &self,
                      dirstate_node: &NodeRef<'tree, 'on_disk>,
                      fs_entry: &DirEntry,
                  ) -> Result<(), DirstateV2ParseError> {
                      // Keep the low 31 bits
                      fn truncate_u64(value: u64) -> i32 {
                          (value & 0x7FFF_FFFF) as i32
                      }
                      let fs_metadata = match fs_entry.symlink_metadata() {
                          Ok(meta) => meta,
                          Err(_) => return Ok(()),
                      };
                      let entry = dirstate_node
                          .entry()?
                          .expect("handle_normal_file called with entry-less node");
                      let mode_changed =
                          || self.options.check_exec && entry.mode_changed(&fs_metadata);
                      let size = entry.size();
                      let size_changed = size != truncate_u64(fs_metadata.len());
                      if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
                          // issue6456: Size returned may be longer due to encryption
                          // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
                          self.push_outcome(Outcome::Unsure, dirstate_node)?
                      } else if dirstate_node.has_copy_source()
                          || entry.is_from_other_parent()
                          || (size >= 0 && (size_changed || mode_changed()))
                      {
                          self.push_outcome(Outcome::Modified, dirstate_node)?
                      } else {
                          let mtime_looks_clean = if let Some(dirstate_mtime) =
                              entry.truncated_mtime()
                          {
                              let fs_mtime = TruncatedTimestamp::for_mtime_of(&fs_metadata)
                                  .expect("OS/libc does not support mtime?");
                              // There might be a change in the future if for example the
                              // internal clock become off while process run, but this is a
                              // case where the issues the user would face
                              // would be a lot worse and there is nothing we
                              // can really do.
                              fs_mtime.likely_equal(dirstate_mtime)
                          } else {
                              // No mtime in the dirstate entry
                              false
                          };
                          if !mtime_looks_clean {
                              self.push_outcome(Outcome::Unsure, dirstate_node)?
                          } else if self.options.list_clean {
                              self.push_outcome(Outcome::Clean, dirstate_node)?
                          }
                      }
                      Ok(())
                  }
                  /// A node in the dirstate tree has no corresponding filesystem entry
                  fn traverse_dirstate_only(
                      &self,
                      dirstate_node: NodeRef<'tree, 'on_disk>,
                  ) -> Result<(), DirstateV2ParseError> {
                      self.check_for_outdated_directory_cache(&dirstate_node)?;
                      self.mark_removed_or_deleted_if_file(&dirstate_node)?;
                      dirstate_node
                          .children(self.dmap.on_disk)?
                          .par_iter()
                          .map(|child_node| self.traverse_dirstate_only(child_node))
                          .collect()
                  }
                  /// A node in the dirstate tree has no corresponding *file* on the
                  /// filesystem
                  ///
                  /// Does nothing on a "directory" node
                  fn mark_removed_or_deleted_if_file(
                      &self,
                      dirstate_node: &NodeRef<'tree, 'on_disk>,
                  ) -> Result<(), DirstateV2ParseError> {
                      if let Some(entry) = dirstate_node.entry()? {
                          if !entry.any_tracked() {
                              // Future-compat for when we start storing ignored and unknown
                              // files for caching reasons
                              return Ok(());
                          }
                          let path = dirstate_node.full_path(self.dmap.on_disk)?;
                          if self.matcher.matches(path) {
                              if entry.removed() {
                                  self.push_outcome(Outcome::Removed, dirstate_node)?
                              } else {
                                  self.push_outcome(Outcome::Deleted, dirstate_node)?
                              }
                          }
                      }
                      Ok(())
                  }
                  /// Something in the filesystem has no corresponding dirstate node
                  ///
                  /// Returns whether that path is ignored
                  fn traverse_fs_only(
                      &self,
                      has_ignored_ancestor: bool,
                      directory_hg_path: &HgPath,
                      fs_entry: &DirEntry,
                  ) -> bool {
                      let hg_path = directory_hg_path.join(&fs_entry.hg_path);
                      let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink();
                      if fs_entry.is_dir() {
                          let is_ignored =
                              has_ignored_ancestor || (self.ignore_fn)(&hg_path);
                          let traverse_children = if is_ignored {
                              // Descendants of an ignored directory are all ignored
                              self.options.list_ignored
                          } else {
                              // Descendants of an unknown directory may be either unknown or
                              // ignored
                              self.options.list_unknown || self.options.list_ignored
                          };
                          if traverse_children {
                              let is_at_repo_root = false;
                              if let Ok(children_fs_entries) =
                                  self.read_dir(&hg_path, &fs_entry.fs_path, is_at_repo_root)
                              {
                                  children_fs_entries.par_iter().for_each(|child_fs_entry| {
                                      self.traverse_fs_only(
                                          is_ignored,
                                          &hg_path,
                                          child_fs_entry,
                                      );
                                  })
                              }
                              if self.options.collect_traversed_dirs {
                                  self.outcome.lock().unwrap().traversed.push(hg_path.into())
                              }
                          }
                          is_ignored
                      } else if file_or_symlink {
                          if self.matcher.matches(&hg_path) {
                              self.mark_unknown_or_ignored(
                                  has_ignored_ancestor,
                                  &BorrowedPath::InMemory(&hg_path),
                              )
                          } else {
                              // We haven’t computed whether this path is ignored. It
                              // might not be, and a future run of status might have a
                              // different matcher that matches it. So treat it as not
                              // ignored. That is, inhibit readdir caching of the parent
                              // directory.
                              false
                          }
                      } else {
                          // This is neither a directory, a plain file, or a symlink.
                          // Treat it like an ignored file.
                          true
                      }
                  }
                  /// Returns whether that path is ignored
                  fn mark_unknown_or_ignored(
                      &self,
                      has_ignored_ancestor: bool,
                      hg_path: &BorrowedPath<'_, 'on_disk>,
                  ) -> bool {
                      let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
                      if is_ignored {
                          if self.options.list_ignored {
                              self.push_outcome_without_copy_source(
                                  Outcome::Ignored,
                                  hg_path,
                              )
                          }
                      } else if self.options.list_unknown {
                          self.push_outcome_without_copy_source(Outcome::Unknown, hg_path)
                      }
                      is_ignored
                  }
              }
              /// Since [`std::fs::FileType`] cannot be built directly, we emulate what we
              /// care about.
              #[derive(Copy, Clone, Debug, PartialEq, Eq)]
              enum FakeFileType {
                  File,
                  Directory,
                  Symlink,
                  BadType(BadType),
              }
              impl From<std::fs::FileType> for FakeFileType {
                  fn from(f: std::fs::FileType) -> Self {
                      if f.is_dir() {
                          Self::Directory
                      } else if f.is_file() {
                          Self::File
                      } else if f.is_symlink() {
                          Self::Symlink
                      } else if f.is_fifo() {
                          Self::BadType(BadType::FIFO)
                      } else if f.is_block_device() {
                          Self::BadType(BadType::BlockDevice)
                      } else if f.is_char_device() {
                          Self::BadType(BadType::CharacterDevice)
                      } else if f.is_socket() {
                          Self::BadType(BadType::Socket)
                      } else {
                          Self::BadType(BadType::Unknown)
                      }
                  }
              }
              struct DirEntry<'a> {
                  /// Path as stored in the dirstate, or just the filename for optimization.
                  hg_path: HgPathCow<'a>,
                  /// Filesystem path
                  fs_path: Cow<'a, Path>,
                  /// Lazily computed
                  symlink_metadata: Option<std::fs::Metadata>,
                  /// Already computed for ergonomics.
                  file_type: FakeFileType,
              }
              impl<'a> DirEntry<'a> {
                  /// Returns **unsorted** entries in the given directory, with name,
                  /// metadata and file type.
                  ///
                  /// If a `.hg` sub-directory is encountered:
                  ///
                  /// * At the repository root, ignore that sub-directory
                  /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
                  ///   list instead.
                  fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
                      // `read_dir` returns a "not found" error for the empty path
                      let at_cwd = path == Path::new("");
                      let read_dir_path = if at_cwd { Path::new(".") } else { path };
                      let mut results = Vec::new();
                      for entry in read_dir_path.read_dir()? {
                          let entry = entry?;
                          let file_type = match entry.file_type() {
                              Ok(v) => v,
                              Err(e) => {
                                  // race with file deletion?
                                  if e.kind() == std::io::ErrorKind::NotFound {
                                      continue;
                                  } else {
                                      return Err(e);
                                  }
                              }
                          };
                          let file_name = entry.file_name();
                          // FIXME don't do this when cached
                          if file_name == ".hg" {
                              if is_at_repo_root {
                                  // Skip the repo’s own .hg (might be a symlink)
                                  continue;
                              } else if file_type.is_dir() {
                                  // A .hg sub-directory at another location means a subrepo,
                                  // skip it entirely.
                                  return Ok(Vec::new());
                              }
                          }
                          let full_path = if at_cwd {
                              file_name.clone().into()
                          } else {
                              entry.path()
                          };
                          let filename =
                              Cow::Owned(get_bytes_from_os_string(file_name).into());
                          let file_type = FakeFileType::from(file_type);
                          results.push(DirEntry {
                              hg_path: filename,
                              fs_path: Cow::Owned(full_path.to_path_buf()),
                              symlink_metadata: None,
                              file_type,
                          })
                      }
                      Ok(results)
                  }
                  fn symlink_metadata(&self) -> Result<std::fs::Metadata, std::io::Error> {
                      match &self.symlink_metadata {
                          Some(meta) => Ok(meta.clone()),
                          None => std::fs::symlink_metadata(&self.fs_path),
                      }
                  }
                  fn is_dir(&self) -> bool {
                      self.file_type == FakeFileType::Directory
                  }
                  fn is_file(&self) -> bool {
                      self.file_type == FakeFileType::File
                  }
                  fn is_symlink(&self) -> bool {
                      self.file_type == FakeFileType::Symlink
                  }
                  fn is_bad(&self) -> Option<BadType> {
                      match self.file_type {
                          FakeFileType::BadType(ty) => Some(ty),
                          _ => None,
                      }
                  }
              }
              /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
              /// of the give repository.
              ///
              /// This is similar to `SystemTime::now()`, with the result truncated to the
              /// same time resolution as other files’ modification times. Using `.hg`
              /// instead of the system’s default temporary directory (such as `/tmp`) makes
              /// it more likely the temporary file is in the same disk partition as contents
              /// of the working directory, which can matter since different filesystems may
              /// store timestamps with different resolutions.
              ///
              /// This may fail, typically if we lack write permissions. In that case we
              /// should continue the `status()` algoritm anyway and consider the current
              /// date/time to be unknown.
              fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
                  tempfile::tempfile_in(repo_root.join(".hg"))?
                      .metadata()?
                      .modified()
              }

rust/hg-core/src/discovery.rs

0 +2 -2

              // discovery.rs
              //
              // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Discovery operations
              //!
              //! This is a Rust counterpart to the `partialdiscovery` class of
              //! `mercurial.setdiscovery`
              use super::{Graph, GraphError, Revision, NULL_REVISION};
              use crate::{ancestors::MissingAncestors, dagops, FastHashMap};
              use rand::seq::SliceRandom;
              use rand::{thread_rng, RngCore, SeedableRng};
              use std::cmp::{max, min};
              use std::collections::{HashSet, VecDeque};
              type Rng = rand_pcg::Pcg32;
              type Seed = [u8; 16];
              pub struct PartialDiscovery<G: Graph + Clone> {
                  target_heads: Option<Vec<Revision>>,
                  graph: G, // plays the role of self._repo
                  common: MissingAncestors<G>,
                  undecided: Option<HashSet<Revision>>,
                  children_cache: Option<FastHashMap<Revision, Vec<Revision>>>,
                  missing: HashSet<Revision>,
                  rng: Rng,
                  respect_size: bool,
                  randomize: bool,
              }
              pub struct DiscoveryStats {
                  pub undecided: Option<usize>,
              }
              /// Update an existing sample to match the expected size
              ///
              /// The sample is updated with revisions exponentially distant from each
              /// element of `heads`.
              ///
              /// If a target size is specified, the sampling will stop once this size is
              /// reached. Otherwise sampling will happen until roots of the <revs> set are
              /// reached.
              ///
              /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
              ///   represented by `parentfn`
              /// - `heads`: set of DAG head revs
              /// - `sample`: a sample to update
              /// - `parentfn`: a callable to resolve parents for a revision
              /// - `quicksamplesize`: optional target size of the sample
              fn update_sample<I>(
                  revs: Option<&HashSet<Revision>>,
                  heads: impl IntoIterator<Item = Revision>,
                  sample: &mut HashSet<Revision>,
                  parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
                  quicksamplesize: Option<usize>,
              ) -> Result<(), GraphError>
              where
                  I: Iterator<Item = Revision>,
              {
                  let mut distances: FastHashMap<Revision, u32> = FastHashMap::default();
                  let mut visit: VecDeque<Revision> = heads.into_iter().collect();
                  let mut factor: u32 = 1;
                  let mut seen: HashSet<Revision> = HashSet::new();
                  while let Some(current) = visit.pop_front() {
                      if !seen.insert(current) {
                          continue;
                      }
                      let d = *distances.entry(current).or_insert(1);
                      if d > factor {
                          factor *= 2;
                      }
                      if d == factor {
                          sample.insert(current);
                          if let Some(sz) = quicksamplesize {
                              if sample.len() >= sz {
                                  return Ok(());
                              }
                          }
                      }
                      for p in parentsfn(current)? {
                          if let Some(revs) = revs {
                              if !revs.contains(&p) {
                                  continue;
                              }
                          }
                          distances.entry(p).or_insert(d + 1);
                          visit.push_back(p);
                      }
                  }
                  Ok(())
              }
              struct ParentsIterator {
                  parents: [Revision; 2],
                  cur: usize,
              }
              impl ParentsIterator {
                  fn graph_parents(
                      graph: &impl Graph,
                      r: Revision,
                  ) -> Result<ParentsIterator, GraphError> {
                      Ok(ParentsIterator {
                          parents: graph.parents(r)?,
                          cur: 0,
                      })
                  }
              }
              impl Iterator for ParentsIterator {
                  type Item = Revision;
                  fn next(&mut self) -> Option<Revision> {
                      if self.cur > 1 {
                          return None;
                      }
                      let rev = self.parents[self.cur];
                      self.cur += 1;
                      if rev == NULL_REVISION {
                          return self.next();
                      }
                      Some(rev)
                  }
              }
              impl<G: Graph + Clone> PartialDiscovery<G> {
                  /// Create a PartialDiscovery object, with the intent
                  /// of comparing our `::<target_heads>` revset to the contents of another
                  /// repo.
                  ///
                  /// For now `target_heads` is passed as a vector, and will be used
                  /// at the first call to `ensure_undecided()`.
                  ///
                  /// If we want to make the signature more flexible,
                  /// we'll have to make it a type argument of `PartialDiscovery` or a trait
                  /// object since we'll keep it in the meanwhile
                  ///
                  /// The `respect_size` boolean controls how the sampling methods
                  /// will interpret the size argument requested by the caller. If it's
                  /// `false`, they are allowed to produce a sample whose size is more
                  /// appropriate to the situation (typically bigger).
                  ///
                  /// The `randomize` boolean affects sampling, and specifically how
                  /// limiting or last-minute expanding is been done:
                  ///
                  /// If `true`, both will perform random picking from `self.undecided`.
                  /// This is currently the best for actual discoveries.
                  ///
                  /// If `false`, a reproductible picking strategy is performed. This is
                  /// useful for integration tests.
                  pub fn new(
                      graph: G,
                      target_heads: Vec<Revision>,
                      respect_size: bool,
                      randomize: bool,
                  ) -> Self {
                      let mut seed = [0; 16];
                      if randomize {
                          thread_rng().fill_bytes(&mut seed);
                      }
                      Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)
                  }
                  pub fn new_with_seed(
                      graph: G,
                      target_heads: Vec<Revision>,
                      seed: Seed,
                      respect_size: bool,
                      randomize: bool,
                  ) -> Self {
                      PartialDiscovery {
                          undecided: None,
                          children_cache: None,
                          target_heads: Some(target_heads),
                          graph: graph.clone(),
                          common: MissingAncestors::new(graph, vec![]),
                          missing: HashSet::new(),
                          rng: Rng::from_seed(seed),
                          respect_size,
                          randomize,
                      }
                  }
                  /// Extract at most `size` random elements from sample and return them
                  /// as a vector
                  fn limit_sample(
                      &mut self,
                      mut sample: Vec<Revision>,
                      size: usize,
                  ) -> Vec<Revision> {
                      if !self.randomize {
                          sample.sort_unstable();
                          sample.truncate(size);
                          return sample;
                      }
                      let sample_len = sample.len();
                      if sample_len <= size {
                          return sample;
                      }
                      let rng = &mut self.rng;
                      let dropped_size = sample_len - size;
                      let limited_slice = if size < dropped_size {
                          sample.partial_shuffle(rng, size).0
                      } else {
                          sample.partial_shuffle(rng, dropped_size).1
                      };
                      limited_slice.to_owned()
                  }
                  /// Register revisions known as being common
                  pub fn add_common_revisions(
                      &mut self,
                      common: impl IntoIterator<Item = Revision>,
                  ) -> Result<(), GraphError> {
                      let before_len = self.common.get_bases().len();
                      self.common.add_bases(common);
                      if self.common.get_bases().len() == before_len {
                          return Ok(());
                      }
                      if let Some(ref mut undecided) = self.undecided {
                          self.common.remove_ancestors_from(undecided)?;
                      }
                      Ok(())
                  }
                  /// Register revisions known as being missing
                  ///
                  /// # Performance note
                  ///
                  /// Except in the most trivial case, the first call of this method has
                  /// the side effect of computing `self.undecided` set for the first time,
                  /// and the related caches it might need for efficiency of its internal
                  /// computation. This is typically faster if more information is
                  /// available in `self.common`. Therefore, for good performance, the
                  /// caller should avoid calling this too early.
                  pub fn add_missing_revisions(
                      &mut self,
                      missing: impl IntoIterator<Item = Revision>,
                  ) -> Result<(), GraphError> {
                      let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();
                      if tovisit.is_empty() {
                          return Ok(());
                      }
                      self.ensure_children_cache()?;
                      self.ensure_undecided()?; // for safety of possible future refactors
                      let children = self.children_cache.as_ref().unwrap();
                      let mut seen: HashSet<Revision> = HashSet::new();
                      let undecided_mut = self.undecided.as_mut().unwrap();
                      while let Some(rev) = tovisit.pop_front() {
                          if !self.missing.insert(rev) {
                              // either it's known to be missing from a previous
                              // invocation, and there's no need to iterate on its
                              // children (we now they are all missing)
                              // or it's from a previous iteration of this loop
                              // and its children have already been queued
                              continue;
                          }
                          undecided_mut.remove(&rev);
                          match children.get(&rev) {
                              None => {
                                  continue;
                              }
                              Some(this_children) => {
                                  for child in this_children.iter().cloned() {
                                      if seen.insert(child) {
                                          tovisit.push_back(child);
                                      }
                                  }
                              }
                          }
                      }
                      Ok(())
                  }
                  /// Do we have any information about the peer?
                  pub fn has_info(&self) -> bool {
                      self.common.has_bases()
                  }
                  /// Did we acquire full knowledge of our Revisions that the peer has?
                  pub fn is_complete(&self) -> bool {
                      self.undecided.as_ref().map_or(false, HashSet::is_empty)
                  }
                  /// Return the heads of the currently known common set of revisions.
                  ///
                  /// If the discovery process is not complete (see `is_complete()`), the
                  /// caller must be aware that this is an intermediate state.
                  ///
                  /// On the other hand, if it is complete, then this is currently
                  /// the only way to retrieve the end results of the discovery process.
                  ///
                  /// We may introduce in the future an `into_common_heads` call that
                  /// would be more appropriate for normal Rust callers, dropping `self`
                  /// if it is complete.
                  pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
                      self.common.bases_heads()
                  }
                  /// Force first computation of `self.undecided`
                  ///
                  /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
                  /// unwrapped to get workable immutable or mutable references without
                  /// any panic.
                  ///
                  /// This is an imperative call instead of an access with added lazyness
                  /// to reduce easily the scope of mutable borrow for the caller,
                  /// compared to undecided(&'a mut self) -> &'a… that would keep it
                  /// as long as the resulting immutable one.
                  fn ensure_undecided(&mut self) -> Result<(), GraphError> {
                      if self.undecided.is_some() {
                          return Ok(());
                      }
                      let tgt = self.target_heads.take().unwrap();
                      self.undecided =
                          Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
                      Ok(())
                  }
                  fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
                      if self.children_cache.is_some() {
                          return Ok(());
                      }
                      self.ensure_undecided()?;
                      let mut children: FastHashMap<Revision, Vec<Revision>> =
                          FastHashMap::default();
                      for &rev in self.undecided.as_ref().unwrap() {
                          for p in ParentsIterator::graph_parents(&self.graph, rev)? {
-                             children.entry(p).or_insert_with(Vec::new).push(rev);
+                             children.entry(p).or_default().push(rev);
                          }
                      }
                      self.children_cache = Some(children);
                      Ok(())
                  }
                  /// Provide statistics about the current state of the discovery process
                  pub fn stats(&self) -> DiscoveryStats {
                      DiscoveryStats {
                          undecided: self.undecided.as_ref().map(HashSet::len),
                      }
                  }
                  pub fn take_quick_sample(
                      &mut self,
                      headrevs: impl IntoIterator<Item = Revision>,
                      size: usize,
                  ) -> Result<Vec<Revision>, GraphError> {
                      self.ensure_undecided()?;
                      let mut sample = {
                          let undecided = self.undecided.as_ref().unwrap();
                          if undecided.len() <= size {
                              return Ok(undecided.iter().cloned().collect());
                          }
                          dagops::heads(&self.graph, undecided.iter())?
                      };
                      if sample.len() >= size {
                          return Ok(self.limit_sample(sample.into_iter().collect(), size));
                      }
                      update_sample(
                          None,
                          headrevs,
                          &mut sample,
                          |r| ParentsIterator::graph_parents(&self.graph, r),
                          Some(size),
                      )?;
                      Ok(sample.into_iter().collect())
                  }
                  /// Extract a sample from `self.undecided`, going from its heads and roots.
                  ///
                  /// The `size` parameter is used to avoid useless computations if
                  /// it turns out to be bigger than the whole set of undecided Revisions.
                  ///
                  /// The sample is taken by using `update_sample` from the heads, then
                  /// from the roots, working on the reverse DAG,
                  /// expressed by `self.children_cache`.
                  ///
                  /// No effort is being made to complete or limit the sample to `size`
                  /// but this method returns another interesting size that it derives
                  /// from its knowledge of the structure of the various sets, leaving
                  /// to the caller the decision to use it or not.
                  fn bidirectional_sample(
                      &mut self,
                      size: usize,
                  ) -> Result<(HashSet<Revision>, usize), GraphError> {
                      self.ensure_undecided()?;
                      {
                          // we don't want to compute children_cache before this
                          // but doing it after extracting self.undecided takes a mutable
                          // ref to self while a shareable one is still active.
                          let undecided = self.undecided.as_ref().unwrap();
                          if undecided.len() <= size {
                              return Ok((undecided.clone(), size));
                          }
                      }
                      self.ensure_children_cache()?;
                      let revs = self.undecided.as_ref().unwrap();
                      let mut sample: HashSet<Revision> = revs.clone();
                      // it's possible that leveraging the children cache would be more
                      // efficient here
                      dagops::retain_heads(&self.graph, &mut sample)?;
                      let revsheads = sample.clone(); // was again heads(revs) in python
                      // update from heads
                      update_sample(
                          Some(revs),
                          revsheads.iter().cloned(),
                          &mut sample,
                          |r| ParentsIterator::graph_parents(&self.graph, r),
                          None,
                      )?;
                      // update from roots
                      let revroots: HashSet<Revision> =
                          dagops::roots(&self.graph, revs)?.into_iter().collect();
                      let prescribed_size = max(size, min(revroots.len(), revsheads.len()));
                      let children = self.children_cache.as_ref().unwrap();
                      let empty_vec: Vec<Revision> = Vec::new();
                      update_sample(
                          Some(revs),
                          revroots,
                          &mut sample,
                          |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
                          None,
                      )?;
                      Ok((sample, prescribed_size))
                  }
                  /// Fill up sample up to the wished size with random undecided Revisions.
                  ///
                  /// This is intended to be used as a last resort completion if the
                  /// regular sampling algorithm returns too few elements.
                  fn random_complete_sample(
                      &mut self,
                      sample: &mut Vec<Revision>,
                      size: usize,
                  ) {
                      let sample_len = sample.len();
                      if size <= sample_len {
                          return;
                      }
                      let take_from: Vec<Revision> = self
                          .undecided
                          .as_ref()
                          .unwrap()
                          .iter()
                          .filter(|&r| !sample.contains(r))
                          .cloned()
                          .collect();
                      sample.extend(self.limit_sample(take_from, size - sample_len));
                  }
                  pub fn take_full_sample(
                      &mut self,
                      size: usize,
                  ) -> Result<Vec<Revision>, GraphError> {
                      let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
                      let size = if self.respect_size {
                          size
                      } else {
                          prescribed_size
                      };
                      let mut sample =
                          self.limit_sample(sample_set.into_iter().collect(), size);
                      self.random_complete_sample(&mut sample, size);
                      Ok(sample)
                  }
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  use crate::testing::SampleGraph;
                  /// Shorthand to reduce boilerplate when creating [`Revision`] for testing
                  macro_rules! R {
                      ($revision:literal) => {
                          Revision($revision)
                      };
                  }
                  /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
                  ///
                  /// To avoid actual randomness in these tests, we give it a fixed
                  /// random seed, but by default we'll test the random version.
                  fn full_disco() -> PartialDiscovery<SampleGraph> {
                      PartialDiscovery::new_with_seed(
                          SampleGraph,
                          vec![R!(10), R!(11), R!(12), R!(13)],
                          [0; 16],
                          true,
                          true,
                      )
                  }
                  /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
                  ///
                  /// To avoid actual randomness in tests, we give it a fixed random seed.
                  fn disco12() -> PartialDiscovery<SampleGraph> {
                      PartialDiscovery::new_with_seed(
                          SampleGraph,
                          vec![R!(12)],
                          [0; 16],
                          true,
                          true,
                      )
                  }
                  fn sorted_undecided(
                      disco: &PartialDiscovery<SampleGraph>,
                  ) -> Vec<Revision> {
                      let mut as_vec: Vec<Revision> =
                          disco.undecided.as_ref().unwrap().iter().cloned().collect();
                      as_vec.sort_unstable();
                      as_vec
                  }
                  fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
                      let mut as_vec: Vec<Revision> =
                          disco.missing.iter().cloned().collect();
                      as_vec.sort_unstable();
                      as_vec
                  }
                  fn sorted_common_heads(
                      disco: &PartialDiscovery<SampleGraph>,
                  ) -> Result<Vec<Revision>, GraphError> {
                      let mut as_vec: Vec<Revision> =
                          disco.common_heads()?.iter().cloned().collect();
                      as_vec.sort_unstable();
                      Ok(as_vec)
                  }
                  #[test]
                  fn test_add_common_get_undecided() -> Result<(), GraphError> {
                      let mut disco = full_disco();
                      assert_eq!(disco.undecided, None);
                      assert!(!disco.has_info());
                      assert_eq!(disco.stats().undecided, None);
                      disco.add_common_revisions(vec![R!(11), R!(12)])?;
                      assert!(disco.has_info());
                      assert!(!disco.is_complete());
                      assert!(disco.missing.is_empty());
                      // add_common_revisions did not trigger a premature computation
                      // of `undecided`, let's check that and ask for them
                      assert_eq!(disco.undecided, None);
                      disco.ensure_undecided()?;
                      assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
                      assert_eq!(disco.stats().undecided, Some(4));
                      Ok(())
                  }
                  /// in this test, we pretend that our peer misses exactly (8+10)::
                  /// and we're comparing all our repo to it (as in a bare push)
                  #[test]
                  fn test_discovery() -> Result<(), GraphError> {
                      let mut disco = full_disco();
                      disco.add_common_revisions(vec![R!(11), R!(12)])?;
                      disco.add_missing_revisions(vec![R!(8), R!(10)])?;
                      assert_eq!(sorted_undecided(&disco), vec![5]);
                      assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
                      assert!(!disco.is_complete());
                      disco.add_common_revisions(vec![R!(5)])?;
                      assert_eq!(sorted_undecided(&disco), Vec::<Revision>::new());
                      assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
                      assert!(disco.is_complete());
                      assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
                      Ok(())
                  }
                  #[test]
                  fn test_add_missing_early_continue() -> Result<(), GraphError> {
                      eprintln!("test_add_missing_early_stop");
                      let mut disco = full_disco();
                      disco.add_common_revisions(vec![R!(13), R!(3), R!(4)])?;
                      disco.ensure_children_cache()?;
                      // 12 is grand-child of 6 through 9
                      // passing them in this order maximizes the chances of the
                      // early continue to do the wrong thing
                      disco.add_missing_revisions(vec![R!(6), R!(9), R!(12)])?;
                      assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);
                      assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);
                      assert!(!disco.is_complete());
                      Ok(())
                  }
                  #[test]
                  fn test_limit_sample_no_need_to() {
                      let sample = vec![R!(1), R!(2), R!(3), R!(4)];
                      assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
                  }
                  #[test]
                  fn test_limit_sample_less_than_half() {
                      assert_eq!(
                          full_disco().limit_sample((1..6).map(Revision).collect(), 2),
                          vec![2, 5]
                      );
                  }
                  #[test]
                  fn test_limit_sample_more_than_half() {
                      assert_eq!(
                          full_disco().limit_sample((1..4).map(Revision).collect(), 2),
                          vec![1, 2]
                      );
                  }
                  #[test]
                  fn test_limit_sample_no_random() {
                      let mut disco = full_disco();
                      disco.randomize = false;
                      assert_eq!(
                          disco.limit_sample(
                              vec![R!(1), R!(8), R!(13), R!(5), R!(7), R!(3)],
 
                          ),
                          vec![1, 3, 5, 7]
                      );
                  }
                  #[test]
                  fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
                      let mut disco = full_disco();
                      disco.undecided = Some((1..=13).map(Revision).collect());
                      let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
                      sample_vec.sort_unstable();
                      assert_eq!(sample_vec, vec![10, 11, 12, 13]);
                      Ok(())
                  }
                  #[test]
                  fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
                      let mut disco = disco12();
                      disco.ensure_undecided()?;
                      let mut sample_vec = disco.take_quick_sample(vec![R!(12)], 4)?;
                      sample_vec.sort_unstable();
                      // r12's only parent is r9, whose unique grand-parent through the
                      // diamond shape is r4. This ends there because the distance from r4
                      // to the root is only 3.
                      assert_eq!(sample_vec, vec![4, 9, 12]);
                      Ok(())
                  }
                  #[test]
                  fn test_children_cache() -> Result<(), GraphError> {
                      let mut disco = full_disco();
                      disco.ensure_children_cache()?;
                      let cache = disco.children_cache.unwrap();
                      assert_eq!(cache.get(&R!(2)).cloned(), Some(vec![R!(4)]));
                      assert_eq!(cache.get(&R!(10)).cloned(), None);
                      let mut children_4 = cache.get(&R!(4)).cloned().unwrap();
                      children_4.sort_unstable();
                      assert_eq!(children_4, vec![R!(5), R!(6), R!(7)]);
                      let mut children_7 = cache.get(&R!(7)).cloned().unwrap();
                      children_7.sort_unstable();
                      assert_eq!(children_7, vec![R!(9), R!(11)]);
                      Ok(())
                  }
                  #[test]
                  fn test_complete_sample() {
                      let mut disco = full_disco();
                      let undecided: HashSet<Revision> =
                          [4, 7, 9, 2, 3].iter().cloned().map(Revision).collect();
                      disco.undecided = Some(undecided);
                      let mut sample = vec![R!(0)];
                      disco.random_complete_sample(&mut sample, 3);
                      assert_eq!(sample.len(), 3);
                      let mut sample = vec![R!(2), R!(4), R!(7)];
                      disco.random_complete_sample(&mut sample, 1);
                      assert_eq!(sample.len(), 3);
                  }
                  #[test]
                  fn test_bidirectional_sample() -> Result<(), GraphError> {
                      let mut disco = full_disco();
-                     disco.undecided = Some((0..=13).into_iter().map(Revision).collect());
+                     disco.undecided = Some((0..=13).map(Revision).collect());
                      let (sample_set, size) = disco.bidirectional_sample(7)?;
                      assert_eq!(size, 7);
                      let mut sample: Vec<Revision> = sample_set.into_iter().collect();
                      sample.sort_unstable();
                      // our DAG is a bit too small for the results to be really interesting
                      // at least it shows that
                      // - we went both ways
                      // - we didn't take all Revisions (6 is not in the sample)
                      assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
                      Ok(())
                  }
              }

rust/hg-core/src/filepatterns.rs

0 +11 -13

              // filepatterns.rs
              //
              // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Handling of Mercurial-specific patterns.
              use crate::{
                  utils::{
                      files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
                      hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
                      SliceExt,
                  },
                  FastHashMap, PatternError,
              };
              use lazy_static::lazy_static;
              use regex::bytes::{NoExpand, Regex};
              use std::ops::Deref;
              use std::path::{Path, PathBuf};
              use std::vec::Vec;
              lazy_static! {
                  static ref RE_ESCAPE: Vec<Vec<u8>> = {
                      let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
                      let to_escape = b"()[]{}?*+-|^$\\.&~#\t\n\r\x0b\x0c";
                      for byte in to_escape {
                          v[*byte as usize].insert(0, b'\\');
                      }
                      v
                  };
              }
              /// These are matched in order
              const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
                  &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
              #[derive(Debug, Clone, PartialEq, Eq)]
              pub enum PatternSyntax {
                  /// A regular expression
                  Regexp,
                  /// Glob that matches at the front of the path
                  RootGlob,
                  /// Glob that matches at any suffix of the path (still anchored at
                  /// slashes)
                  Glob,
                  /// a path relative to repository root, which is matched recursively
                  Path,
                  /// a single exact path relative to repository root
                  FilePath,
                  /// A path relative to cwd
                  RelPath,
                  /// an unrooted glob (*.rs matches Rust files in all dirs)
                  RelGlob,
                  /// A regexp that needn't match the start of a name
                  RelRegexp,
                  /// A path relative to repository root, which is matched non-recursively
                  /// (will not match subdirectories)
                  RootFiles,
                  /// A file of patterns to read and include
                  Include,
                  /// A file of patterns to match against files under the same directory
                  SubInclude,
                  /// SubInclude with the result of parsing the included file
                  ///
                  /// Note: there is no ExpandedInclude because that expansion can be done
                  /// in place by replacing the Include pattern by the included patterns.
                  /// SubInclude requires more handling.
                  ///
                  /// Note: `Box` is used to minimize size impact on other enum variants
                  ExpandedSubInclude(Box<SubInclude>),
              }
              /// Transforms a glob pattern into a regex
              fn glob_to_re(pat: &[u8]) -> Vec<u8> {
                  let mut input = pat;
                  let mut res: Vec<u8> = vec![];
                  let mut group_depth = 0;
                  while let Some((c, rest)) = input.split_first() {
                      input = rest;
                      match c {
                          b'*' => {
                              for (source, repl) in GLOB_REPLACEMENTS {
                                  if let Some(rest) = input.drop_prefix(source) {
                                      input = rest;
                                      res.extend(*repl);
                                      break;
                                  }
                              }
                          }
                          b'?' => res.extend(b"."),
                          b'[' => {
                              match input.iter().skip(1).position(|b| *b == b']') {
                                  None => res.extend(b"\\["),
                                  Some(end) => {
                                      // Account for the one we skipped
                                      let end = end + 1;
                                      res.extend(b"[");
                                      for (i, b) in input[..end].iter().enumerate() {
                                          if *b == b'!' && i == 0 {
                                              res.extend(b"^")
                                          } else if *b == b'^' && i == 0 {
                                              res.extend(b"\\^")
                                          } else if *b == b'\\' {
                                              res.extend(b"\\\\")
                                          } else {
                                              res.push(*b)
                                          }
                                      }
                                      res.extend(b"]");
                                      input = &input[end + 1..];
                                  }
                              }
                          }
                          b'{' => {
                              group_depth += 1;
                              res.extend(b"(?:")
                          }
                          b'}' if group_depth > 0 => {
                              group_depth -= 1;
                              res.extend(b")");
                          }
                          b',' if group_depth > 0 => res.extend(b"|"),
                          b'\\' => {
                              let c = {
                                  if let Some((c, rest)) = input.split_first() {
                                      input = rest;
                                      c
                                  } else {
                                      c
                                  }
                              };
                              res.extend(&RE_ESCAPE[*c as usize])
                          }
                          _ => res.extend(&RE_ESCAPE[*c as usize]),
                      }
                  }
                  res
              }
              fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
                  pattern
                      .iter()
                      .flat_map(|c| RE_ESCAPE[*c as usize].clone())
                      .collect()
              }
              pub fn parse_pattern_syntax(
                  kind: &[u8],
              ) -> Result<PatternSyntax, PatternError> {
                  match kind {
                      b"re:" => Ok(PatternSyntax::Regexp),
                      b"path:" => Ok(PatternSyntax::Path),
                      b"filepath:" => Ok(PatternSyntax::FilePath),
                      b"relpath:" => Ok(PatternSyntax::RelPath),
                      b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
                      b"relglob:" => Ok(PatternSyntax::RelGlob),
                      b"relre:" => Ok(PatternSyntax::RelRegexp),
                      b"glob:" => Ok(PatternSyntax::Glob),
                      b"rootglob:" => Ok(PatternSyntax::RootGlob),
                      b"include:" => Ok(PatternSyntax::Include),
                      b"subinclude:" => Ok(PatternSyntax::SubInclude),
                      _ => Err(PatternError::UnsupportedSyntax(
                          String::from_utf8_lossy(kind).to_string(),
                      )),
                  }
              }
              lazy_static! {
                  static ref FLAG_RE: Regex = Regex::new(r"^\(\?[aiLmsux]+\)").unwrap();
              }
              /// Builds the regex that corresponds to the given pattern.
              /// If within a `syntax: regexp` context, returns the pattern,
              /// otherwise, returns the corresponding regex.
              fn _build_single_regex(entry: &IgnorePattern, glob_suffix: &[u8]) -> Vec<u8> {
                  let IgnorePattern {
                      syntax, pattern, ..
                  } = entry;
                  if pattern.is_empty() {
                      return vec![];
                  }
                  match syntax {
                      PatternSyntax::Regexp => pattern.to_owned(),
                      PatternSyntax::RelRegexp => {
                          // The `regex` crate accepts `**` while `re2` and Python's `re`
                          // do not. Checking for `*` correctly triggers the same error all
                          // engines.
                          if pattern[0] == b'^'
                              || pattern[0] == b'*'
                              || pattern.starts_with(b".*")
                          {
                              return pattern.to_owned();
                          }
                          match FLAG_RE.find(pattern) {
                              Some(mat) => {
                                  let s = mat.start();
                                  let e = mat.end();
                                  [
                                      &b"(?"[..],
                                      &pattern[s + 2..e - 1],
                                      &b":"[..],
                                      if pattern[e] == b'^'
                                          || pattern[e] == b'*'
                                          || pattern[e..].starts_with(b".*")
                                      {
                                          &b""[..]
                                      } else {
                                          &b".*"[..]
                                      },
                                      &pattern[e..],
                                      &b")"[..],
                                  ]
                                  .concat()
                              }
                              None => [&b".*"[..], pattern].concat(),
                          }
                      }
                      PatternSyntax::Path | PatternSyntax::RelPath => {
                          if pattern == b"." {
                              return vec![];
                          }
                          [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
                      }
                      PatternSyntax::RootFiles => {
                          let mut res = if pattern == b"." {
                              vec![]
                          } else {
                              // Pattern is a directory name.
                              [escape_pattern(pattern).as_slice(), b"/"].concat()
                          };
                          // Anything after the pattern must be a non-directory.
                          res.extend(b"[^/]+$");
                          res
                      }
                      PatternSyntax::RelGlob => {
                          let glob_re = glob_to_re(pattern);
                          if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
                              [b".*", rest, glob_suffix].concat()
                          } else {
                              [b"(?:.*/)?", glob_re.as_slice(), glob_suffix].concat()
                          }
                      }
                      PatternSyntax::Glob | PatternSyntax::RootGlob => {
                          [glob_to_re(pattern).as_slice(), glob_suffix].concat()
                      }
                      PatternSyntax::Include
                      | PatternSyntax::SubInclude
                      | PatternSyntax::ExpandedSubInclude(_)
                      | PatternSyntax::FilePath => unreachable!(),
                  }
              }
              const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
                  [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
              /// TODO support other platforms
              #[cfg(unix)]
              pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
                  if bytes.is_empty() {
                      return b".".to_vec();
                  }
                  let sep = b'/';
                  let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
                  if initial_slashes > 2 {
                      // POSIX allows one or two initial slashes, but treats three or more
                      // as single slash.
                      initial_slashes = 1;
                  }
                  let components = bytes
                      .split(|b| *b == sep)
                      .filter(|c| !(c.is_empty() || c == b"."))
                      .fold(vec![], |mut acc, component| {
                          if component != b".."
                              || (initial_slashes == 0 && acc.is_empty())
                              || (!acc.is_empty() && acc[acc.len() - 1] == b"..")
                          {
                              acc.push(component)
                          } else if !acc.is_empty() {
                              acc.pop();
                          }
                          acc
                      });
                  let mut new_bytes = components.join(&sep);
                  if initial_slashes > 0 {
                      let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
                      buf.extend(new_bytes);
                      new_bytes = buf;
                  }
                  if new_bytes.is_empty() {
                      b".".to_vec()
                  } else {
                      new_bytes
                  }
              }
              /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
              /// that don't need to be transformed into a regex.
              pub fn build_single_regex(
                  entry: &IgnorePattern,
                  glob_suffix: &[u8],
              ) -> Result<Option<Vec<u8>>, PatternError> {
                  let IgnorePattern {
                      pattern, syntax, ..
                  } = entry;
                  let pattern = match syntax {
                      PatternSyntax::RootGlob
                      | PatternSyntax::Path
                      | PatternSyntax::RelGlob
                      | PatternSyntax::RelPath
                      | PatternSyntax::RootFiles => normalize_path_bytes(pattern),
                      PatternSyntax::Include | PatternSyntax::SubInclude => {
                          return Err(PatternError::NonRegexPattern(entry.clone()))
                      }
                      _ => pattern.to_owned(),
                  };
                  let is_simple_rootglob = *syntax == PatternSyntax::RootGlob
                      && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b));
                  if is_simple_rootglob || syntax == &PatternSyntax::FilePath {
                      Ok(None)
                  } else {
                      let mut entry = entry.clone();
                      entry.pattern = pattern;
                      Ok(Some(_build_single_regex(&entry, glob_suffix)))
                  }
              }
              lazy_static! {
                  static ref SYNTAXES: FastHashMap<&'static [u8], PatternSyntax> = {
                      let mut m = FastHashMap::default();
                      m.insert(b"re:".as_ref(), PatternSyntax::Regexp);
                      m.insert(b"regexp:".as_ref(), PatternSyntax::Regexp);
                      m.insert(b"path:".as_ref(), PatternSyntax::Path);
                      m.insert(b"filepath:".as_ref(), PatternSyntax::FilePath);
                      m.insert(b"relpath:".as_ref(), PatternSyntax::RelPath);
                      m.insert(b"rootfilesin:".as_ref(), PatternSyntax::RootFiles);
                      m.insert(b"relglob:".as_ref(), PatternSyntax::RelGlob);
                      m.insert(b"relre:".as_ref(), PatternSyntax::RelRegexp);
                      m.insert(b"glob:".as_ref(), PatternSyntax::Glob);
                      m.insert(b"rootglob:".as_ref(), PatternSyntax::RootGlob);
                      m.insert(b"include:".as_ref(), PatternSyntax::Include);
                      m.insert(b"subinclude:".as_ref(), PatternSyntax::SubInclude);
                      m
                  };
              }
              #[derive(Debug)]
              pub enum PatternFileWarning {
                  /// (file path, syntax bytes)
                  InvalidSyntax(PathBuf, Vec<u8>),
                  /// File path
                  NoSuchFile(PathBuf),
              }
              pub fn parse_one_pattern(
                  pattern: &[u8],
                  source: &Path,
                  default: PatternSyntax,
                  normalize: bool,
              ) -> IgnorePattern {
                  let mut pattern_bytes: &[u8] = pattern;
                  let mut syntax = default;
                  for (s, val) in SYNTAXES.iter() {
                      if let Some(rest) = pattern_bytes.drop_prefix(s) {
                          syntax = val.clone();
                          pattern_bytes = rest;
                          break;
                      }
                  }
                  let pattern = match syntax {
                      PatternSyntax::RootGlob
                      | PatternSyntax::Path
                      | PatternSyntax::Glob
                      | PatternSyntax::RelGlob
                      | PatternSyntax::RelPath
                      | PatternSyntax::RootFiles
                          if normalize =>
                      {
                          normalize_path_bytes(pattern_bytes)
                      }
                      _ => pattern_bytes.to_vec(),
                  };
                  IgnorePattern {
                      syntax,
                      pattern,
                      source: source.to_owned(),
                  }
              }
              pub fn parse_pattern_file_contents(
                  lines: &[u8],
                  file_path: &Path,
                  default_syntax_override: Option<PatternSyntax>,
                  warn: bool,
                  relativize: bool,
              ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
                  let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
                  #[allow(clippy::trivial_regex)]
                  let comment_escape_regex = Regex::new(r"\\#").unwrap();
                  let mut inputs: Vec<IgnorePattern> = vec![];
                  let mut warnings: Vec<PatternFileWarning> = vec![];
                  let mut current_syntax =
                      default_syntax_override.unwrap_or(PatternSyntax::RelRegexp);
                  for mut line in lines.split(|c| *c == b'\n') {
                      let line_buf;
                      if line.contains(&b'#') {
                          if let Some(cap) = comment_regex.captures(line) {
                              line = &line[..cap.get(1).unwrap().end()]
                          }
                          line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
                          line = &line_buf;
                      }
                      let line = line.trim_end();
                      if line.is_empty() {
                          continue;
                      }
                      if let Some(syntax) = line.drop_prefix(b"syntax:") {
                          let syntax = syntax.trim();
                          if let Some(parsed) =
                              SYNTAXES.get([syntax, &b":"[..]].concat().as_slice())
                          {
                              current_syntax = parsed.clone();
                          } else if warn {
                              warnings.push(PatternFileWarning::InvalidSyntax(
                                  file_path.to_owned(),
                                  syntax.to_owned(),
                              ));
                          }
                      } else {
                          let pattern = parse_one_pattern(
                              line,
                              file_path,
                              current_syntax.clone(),
                              false,
                          );
                          inputs.push(if relativize {
                              pattern.to_relative()
                          } else {
                              pattern
                          })
                      }
                  }
                  Ok((inputs, warnings))
              }
              pub fn parse_pattern_args(
                  patterns: Vec<Vec<u8>>,
                  cwd: &Path,
                  root: &Path,
              ) -> Result<Vec<IgnorePattern>, HgPathError> {
                  let mut ignore_patterns: Vec<IgnorePattern> = Vec::new();
                  for pattern in patterns {
                      let pattern = parse_one_pattern(
                          &pattern,
                          Path::new("<args>"),
                          PatternSyntax::RelPath,
                          true,
                      );
                      match pattern.syntax {
                          PatternSyntax::RelGlob | PatternSyntax::RelPath => {
                              let name = get_path_from_bytes(&pattern.pattern);
                              let canon = canonical_path(root, cwd, name)?;
                              ignore_patterns.push(IgnorePattern {
                                  syntax: pattern.syntax,
                                  pattern: get_bytes_from_path(canon),
                                  source: pattern.source,
                              })
                          }
                          _ => ignore_patterns.push(pattern.to_owned()),
                      };
                  }
                  Ok(ignore_patterns)
              }
              pub fn read_pattern_file(
                  file_path: &Path,
                  warn: bool,
                  inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
              ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
                  match std::fs::read(file_path) {
                      Ok(contents) => {
                          inspect_pattern_bytes(file_path, &contents);
                          parse_pattern_file_contents(&contents, file_path, None, warn, true)
                      }
                      Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok((
                          vec![],
                          vec![PatternFileWarning::NoSuchFile(file_path.to_owned())],
                      )),
                      Err(e) => Err(e.into()),
                  }
              }
              /// Represents an entry in an "ignore" file.
              #[derive(Debug, Eq, PartialEq, Clone)]
              pub struct IgnorePattern {
                  pub syntax: PatternSyntax,
                  pub pattern: Vec<u8>,
                  pub source: PathBuf,
              }
              impl IgnorePattern {
                  pub fn new(syntax: PatternSyntax, pattern: &[u8], source: &Path) -> Self {
                      Self {
                          syntax,
                          pattern: pattern.to_owned(),
                          source: source.to_owned(),
                      }
                  }
                  pub fn to_relative(self) -> Self {
                      let Self {
                          syntax,
                          pattern,
                          source,
                      } = self;
                      Self {
                          syntax: match syntax {
                              PatternSyntax::Regexp => PatternSyntax::RelRegexp,
                              PatternSyntax::Glob => PatternSyntax::RelGlob,
                              x => x,
                          },
                          pattern,
                          source,
                      }
                  }
              }
              pub type PatternResult<T> = Result<T, PatternError>;
              /// Wrapper for `read_pattern_file` that also recursively expands `include:`
              /// and `subinclude:` patterns.
              ///
              /// The former are expanded in place, while `PatternSyntax::ExpandedSubInclude`
              /// is used for the latter to form a tree of patterns.
              pub fn get_patterns_from_file(
                  pattern_file: &Path,
                  root_dir: &Path,
                  inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
              ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
                  let (patterns, mut warnings) =
                      read_pattern_file(pattern_file, true, inspect_pattern_bytes)?;
                  let patterns = patterns
                      .into_iter()
                      .flat_map(|entry| -> PatternResult<_> {
                          Ok(match &entry.syntax {
                              PatternSyntax::Include => {
                                  let inner_include =
                                      root_dir.join(get_path_from_bytes(&entry.pattern));
                                  let (inner_pats, inner_warnings) = get_patterns_from_file(
                                      &inner_include,
                                      root_dir,
                                      inspect_pattern_bytes,
                                  )?;
                                  warnings.extend(inner_warnings);
                                  inner_pats
                              }
                              PatternSyntax::SubInclude => {
                                  let mut sub_include = SubInclude::new(
                                      root_dir,
                                      &entry.pattern,
                                      &entry.source,
                                  )?;
                                  let (inner_patterns, inner_warnings) =
                                      get_patterns_from_file(
                                          &sub_include.path,
                                          &sub_include.root,
                                          inspect_pattern_bytes,
                                      )?;
                                  sub_include.included_patterns = inner_patterns;
                                  warnings.extend(inner_warnings);
                                  vec![IgnorePattern {
                                      syntax: PatternSyntax::ExpandedSubInclude(Box::new(
                                          sub_include,
                                      )),
                                      ..entry
                                  }]
                              }
                              _ => vec![entry],
                          })
                      })
                      .flatten()
                      .collect();
                  Ok((patterns, warnings))
              }
              /// Holds all the information needed to handle a `subinclude:` pattern.
              #[derive(Debug, PartialEq, Eq, Clone)]
              pub struct SubInclude {
                  /// Will be used for repository (hg) paths that start with this prefix.
                  /// It is relative to the current working directory, so comparing against
                  /// repository paths is painless.
                  pub prefix: HgPathBuf,
                  /// The file itself, containing the patterns
                  pub path: PathBuf,
                  /// Folder in the filesystem where this it applies
                  pub root: PathBuf,
                  pub included_patterns: Vec<IgnorePattern>,
              }
              impl SubInclude {
                  pub fn new(
                      root_dir: &Path,
                      pattern: &[u8],
                      source: &Path,
                  ) -> Result<SubInclude, HgPathError> {
                      let normalized_source =
                          normalize_path_bytes(&get_bytes_from_path(source));
                      let source_root = get_path_from_bytes(&normalized_source);
-                     let source_root =
-                         source_root.parent().unwrap_or_else(|| source_root.deref());
+                     let source_root = source_root.parent().unwrap_or(source_root);
                      let path = source_root.join(get_path_from_bytes(pattern));
                      let new_root = path.parent().unwrap_or_else(|| path.deref());
                      let prefix = canonical_path(root_dir, root_dir, new_root)?;
                      Ok(Self {
                          prefix: path_to_hg_path_buf(prefix).map(|mut p| {
                              if !p.is_empty() {
                                  p.push_byte(b'/');
                              }
                              p
                          })?,
                          path: path.to_owned(),
                          root: new_root.to_owned(),
                          included_patterns: Vec::new(),
                      })
                  }
              }
              /// Separate and pre-process subincludes from other patterns for the "ignore"
              /// phase.
              pub fn filter_subincludes(
                  ignore_patterns: Vec<IgnorePattern>,
              ) -> Result<(Vec<SubInclude>, Vec<IgnorePattern>), HgPathError> {
                  let mut subincludes = vec![];
                  let mut others = vec![];
                  for pattern in ignore_patterns {
                      if let PatternSyntax::ExpandedSubInclude(sub_include) = pattern.syntax
                      {
                          subincludes.push(*sub_include);
                      } else {
                          others.push(pattern)
                      }
                  }
                  Ok((subincludes, others))
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  use pretty_assertions::assert_eq;
                  #[test]
                  fn escape_pattern_test() {
                      let untouched =
                          br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
                      assert_eq!(escape_pattern(untouched), untouched.to_vec());
                      // All escape codes
                      assert_eq!(
-                         escape_pattern(br#"()[]{}?*+-|^$\\.&~#\t\n\r\v\f"#),
-                         br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\\t\\n\\r\\v\\f"#
-                             .to_vec()
+                         escape_pattern(br"()[]{}?*+-|^$\\.&~#\t\n\r\v\f"),
+                         br"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\\t\\n\\r\\v\\f".to_vec()
                      );
                  }
                  #[test]
                  fn glob_test() {
-                     assert_eq!(glob_to_re(br#"?"#), br#"."#);
-                     assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
-                     assert_eq!(glob_to_re(br#"**"#), br#".*"#);
-                     assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
-                     assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
-                     assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
-                     assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
-                     assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
+                     assert_eq!(glob_to_re(br"?"), br".");
+                     assert_eq!(glob_to_re(br"*"), br"[^/]*");
+                     assert_eq!(glob_to_re(br"**"), br".*");
+                     assert_eq!(glob_to_re(br"**/a"), br"(?:.*/)?a");
+                     assert_eq!(glob_to_re(br"a/**/b"), br"a/(?:.*/)?b");
+                     assert_eq!(glob_to_re(br"[a*?!^][^b][!c]"), br"[a*?!^][\^b][^c]");
+                     assert_eq!(glob_to_re(br"{a,b}"), br"(?:a|b)");
+                     assert_eq!(glob_to_re(br".\*\?"), br"\.\*\?");
                  }
                  #[test]
                  fn test_parse_pattern_file_contents() {
                      let lines = b"syntax: glob\n*.elc";
                      assert_eq!(
                          parse_pattern_file_contents(
                              lines,
                              Path::new("file_path"),
                              None,
                              false,
                              true,
                          )
                          .unwrap()
                          .0,
                          vec![IgnorePattern::new(
                              PatternSyntax::RelGlob,
                              b"*.elc",
                              Path::new("file_path")
                          )],
                      );
                      let lines = b"syntax: include\nsyntax: glob";
                      assert_eq!(
                          parse_pattern_file_contents(
                              lines,
                              Path::new("file_path"),
                              None,
                              false,
                              true,
                          )
                          .unwrap()
                          .0,
                          vec![]
                      );
                      let lines = b"glob:**.o";
                      assert_eq!(
                          parse_pattern_file_contents(
                              lines,
                              Path::new("file_path"),
                              None,
                              false,
                              true,
                          )
                          .unwrap()
                          .0,
                          vec![IgnorePattern::new(
                              PatternSyntax::RelGlob,
                              b"**.o",
                              Path::new("file_path")
                          )]
                      );
                  }
                  #[test]
                  fn test_build_single_regex() {
                      assert_eq!(
                          build_single_regex(
                              &IgnorePattern::new(
                                  PatternSyntax::RelGlob,
                                  b"rust/target/",
                                  Path::new("")
                              ),
                              b"(?:/|$)"
                          )
                          .unwrap(),
                          Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
                      );
                      assert_eq!(
                          build_single_regex(
                              &IgnorePattern::new(
                                  PatternSyntax::Regexp,
                                  br"rust/target/\d+",
                                  Path::new("")
                              ),
                              b"(?:/|$)"
                          )
                          .unwrap(),
                          Some(br"rust/target/\d+".to_vec()),
                      );
                  }
                  #[test]
                  fn test_build_single_regex_shortcut() {
                      assert_eq!(
                          build_single_regex(
                              &IgnorePattern::new(
                                  PatternSyntax::RootGlob,
                                  b"",
                                  Path::new("")
                              ),
                              b"(?:/|$)"
                          )
                          .unwrap(),
                          None,
                      );
                      assert_eq!(
                          build_single_regex(
                              &IgnorePattern::new(
                                  PatternSyntax::RootGlob,
                                  b"whatever",
                                  Path::new("")
                              ),
                              b"(?:/|$)"
                          )
                          .unwrap(),
                          None,
                      );
                      assert_eq!(
                          build_single_regex(
                              &IgnorePattern::new(
                                  PatternSyntax::RootGlob,
                                  b"*.o",
                                  Path::new("")
                              ),
                              b"(?:/|$)"
                          )
                          .unwrap(),
                          Some(br"[^/]*\.o(?:/|$)".to_vec()),
                      );
                  }
                  #[test]
                  fn test_build_single_relregex() {
                      assert_eq!(
                          build_single_regex(
                              &IgnorePattern::new(
                                  PatternSyntax::RelRegexp,
                                  b"^ba{2}r",
                                  Path::new("")
                              ),
                              b"(?:/|$)"
                          )
                          .unwrap(),
                          Some(b"^ba{2}r".to_vec()),
                      );
                      assert_eq!(
                          build_single_regex(
                              &IgnorePattern::new(
                                  PatternSyntax::RelRegexp,
                                  b"ba{2}r",
                                  Path::new("")
                              ),
                              b"(?:/|$)"
                          )
                          .unwrap(),
                          Some(b".*ba{2}r".to_vec()),
                      );
                      assert_eq!(
                          build_single_regex(
                              &IgnorePattern::new(
                                  PatternSyntax::RelRegexp,
                                  b"(?ia)ba{2}r",
                                  Path::new("")
                              ),
                              b"(?:/|$)"
                          )
                          .unwrap(),
                          Some(b"(?ia:.*ba{2}r)".to_vec()),
                      );
                      assert_eq!(
                          build_single_regex(
                              &IgnorePattern::new(
                                  PatternSyntax::RelRegexp,
                                  b"(?ia)^ba{2}r",
                                  Path::new("")
                              ),
                              b"(?:/|$)"
                          )
                          .unwrap(),
                          Some(b"(?ia:^ba{2}r)".to_vec()),
                      );
                  }
              }

rust/hg-core/src/matchers.rs

0 +6 -7

              // matchers.rs
              //
              // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Structs and types for matching files and directories.
              use format_bytes::format_bytes;
              use once_cell::sync::OnceCell;
              use crate::{
                  dirstate::dirs_multiset::DirsChildrenMultiset,
                  filepatterns::{
                      build_single_regex, filter_subincludes, get_patterns_from_file,
                      PatternFileWarning, PatternResult,
                  },
                  utils::{
                      files::find_dirs,
                      hg_path::{HgPath, HgPathBuf, HgPathError},
                      Escaped,
                  },
                  DirsMultiset, FastHashMap, IgnorePattern, PatternError, PatternSyntax,
              };
              use crate::dirstate::status::IgnoreFnType;
              use crate::filepatterns::normalize_path_bytes;
              use std::collections::HashSet;
              use std::fmt::{Display, Error, Formatter};
-             use std::ops::Deref;
              use std::path::{Path, PathBuf};
              use std::{borrow::ToOwned, collections::BTreeSet};
              #[derive(Debug, PartialEq)]
              pub enum VisitChildrenSet {
                  /// Don't visit anything
                  Empty,
                  /// Only visit this directory
                  This,
                  /// Visit this directory and these subdirectories
                  /// TODO Should we implement a `NonEmptyHashSet`?
                  Set(HashSet<HgPathBuf>),
                  /// Visit this directory and all subdirectories
                  Recursive,
              }
              pub trait Matcher: core::fmt::Debug {
                  /// Explicitly listed files
                  fn file_set(&self) -> Option<&HashSet<HgPathBuf>>;
                  /// Returns whether `filename` is in `file_set`
                  fn exact_match(&self, filename: &HgPath) -> bool;
                  /// Returns whether `filename` is matched by this matcher
                  fn matches(&self, filename: &HgPath) -> bool;
                  /// Decides whether a directory should be visited based on whether it
                  /// has potential matches in it or one of its subdirectories, and
                  /// potentially lists which subdirectories of that directory should be
                  /// visited. This is based on the match's primary, included, and excluded
                  /// patterns.
                  ///
                  /// # Example
                  ///
                  /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
                  /// return the following values (assuming the implementation of
                  /// visit_children_set is capable of recognizing this; some implementations
                  /// are not).
                  ///
                  /// ```text
                  /// ```ignore
                  /// '' -> {'foo', 'qux'}
                  /// 'baz' -> set()
                  /// 'foo' -> {'bar'}
                  /// // Ideally this would be `Recursive`, but since the prefix nature of
                  /// // matchers is applied to the entire matcher, we have to downgrade this
                  /// // to `This` due to the (yet to be implemented in Rust) non-prefix
                  /// // `RootFilesIn'-kind matcher being mixed in.
                  /// 'foo/bar' -> 'this'
                  /// 'qux' -> 'this'
                  /// ```
                  /// # Important
                  ///
                  /// Most matchers do not know if they're representing files or
                  /// directories. They see `['path:dir/f']` and don't know whether `f` is a
                  /// file or a directory, so `visit_children_set('dir')` for most matchers
                  /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
                  /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
                  /// it may return `VisitChildrenSet::This`.
                  /// Do not rely on the return being a `HashSet` indicating that there are
                  /// no files in this dir to investigate (or equivalently that if there are
                  /// files to investigate in 'dir' that it will always return
                  /// `VisitChildrenSet::This`).
                  fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet;
                  /// Matcher will match everything and `files_set()` will be empty:
                  /// optimization might be possible.
                  fn matches_everything(&self) -> bool;
                  /// Matcher will match exactly the files in `files_set()`: optimization
                  /// might be possible.
                  fn is_exact(&self) -> bool;
              }
              /// Matches everything.
              ///```
              /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
              ///
              /// let matcher = AlwaysMatcher;
              ///
              /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
              /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
              /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
              /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
              /// ```
              #[derive(Debug)]
              pub struct AlwaysMatcher;
              impl Matcher for AlwaysMatcher {
                  fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
                      None
                  }
                  fn exact_match(&self, _filename: &HgPath) -> bool {
                      false
                  }
                  fn matches(&self, _filename: &HgPath) -> bool {
                      true
                  }
                  fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
                      VisitChildrenSet::Recursive
                  }
                  fn matches_everything(&self) -> bool {
                      true
                  }
                  fn is_exact(&self) -> bool {
                      false
                  }
              }
              /// Matches nothing.
              #[derive(Debug)]
              pub struct NeverMatcher;
              impl Matcher for NeverMatcher {
                  fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
                      None
                  }
                  fn exact_match(&self, _filename: &HgPath) -> bool {
                      false
                  }
                  fn matches(&self, _filename: &HgPath) -> bool {
                      false
                  }
                  fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
                      VisitChildrenSet::Empty
                  }
                  fn matches_everything(&self) -> bool {
                      false
                  }
                  fn is_exact(&self) -> bool {
                      true
                  }
              }
              /// Matches the input files exactly. They are interpreted as paths, not
              /// patterns.
              ///
              ///```
              /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::{HgPath, HgPathBuf} };
              ///
              /// let files = vec![HgPathBuf::from_bytes(b"a.txt"), HgPathBuf::from_bytes(br"re:.*\.c$")];
              /// let matcher = FileMatcher::new(files).unwrap();
              ///
              /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
              /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
              /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
              /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
              /// ```
              #[derive(Debug)]
              pub struct FileMatcher {
                  files: HashSet<HgPathBuf>,
                  dirs: DirsMultiset,
                  sorted_visitchildrenset_candidates: OnceCell<BTreeSet<HgPathBuf>>,
              }
              impl FileMatcher {
                  pub fn new(files: Vec<HgPathBuf>) -> Result<Self, HgPathError> {
                      let dirs = DirsMultiset::from_manifest(&files)?;
                      Ok(Self {
-                         files: HashSet::from_iter(files.into_iter()),
+                         files: HashSet::from_iter(files),
                          dirs,
                          sorted_visitchildrenset_candidates: OnceCell::new(),
                      })
                  }
                  fn inner_matches(&self, filename: &HgPath) -> bool {
                      self.files.contains(filename.as_ref())
                  }
              }
              impl Matcher for FileMatcher {
                  fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
                      Some(&self.files)
                  }
                  fn exact_match(&self, filename: &HgPath) -> bool {
                      self.inner_matches(filename)
                  }
                  fn matches(&self, filename: &HgPath) -> bool {
                      self.inner_matches(filename)
                  }
                  fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
                      if self.files.is_empty() || !self.dirs.contains(directory) {
                          return VisitChildrenSet::Empty;
                      }
                      let compute_candidates = || -> BTreeSet<HgPathBuf> {
                          let mut candidates: BTreeSet<HgPathBuf> =
                              self.dirs.iter().cloned().collect();
                          candidates.extend(self.files.iter().cloned());
                          candidates.remove(HgPath::new(b""));
                          candidates
                      };
                      let candidates =
                          if directory.as_ref().is_empty() {
                              compute_candidates()
                          } else {
                              let sorted_candidates = self
                                  .sorted_visitchildrenset_candidates
                                  .get_or_init(compute_candidates);
                              let directory_bytes = directory.as_ref().as_bytes();
                              let start: HgPathBuf =
                                  format_bytes!(b"{}/", directory_bytes).into();
                              let start_len = start.len();
                              // `0` sorts after `/`
                              let end = format_bytes!(b"{}0", directory_bytes).into();
                              BTreeSet::from_iter(sorted_candidates.range(start..end).map(
                                  |c| HgPathBuf::from_bytes(&c.as_bytes()[start_len..]),
                              ))
                          };
                      // `self.dirs` includes all of the directories, recursively, so if
                      // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
                      // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
                      // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
                      // subdir will be in there without a slash.
                      VisitChildrenSet::Set(
                          candidates
                              .into_iter()
                              .filter_map(|c| {
                                  if c.bytes().all(|b| *b != b'/') {
                                      Some(c)
                                  } else {
                                      None
                                  }
                              })
                              .collect(),
                      )
                  }
                  fn matches_everything(&self) -> bool {
                      false
                  }
                  fn is_exact(&self) -> bool {
                      true
                  }
              }
              /// Matches a set of (kind, pat, source) against a 'root' directory.
              /// (Currently the 'root' directory is effectively always empty)
              /// ```
              /// use hg::{
              ///     matchers::{PatternMatcher, Matcher},
              ///     IgnorePattern,
              ///     PatternSyntax,
              ///     utils::hg_path::{HgPath, HgPathBuf}
              /// };
              /// use std::collections::HashSet;
              /// use std::path::Path;
              /// ///
              /// let ignore_patterns : Vec<IgnorePattern> =
              ///     vec![IgnorePattern::new(PatternSyntax::Regexp, br".*\.c$", Path::new("")),
              ///          IgnorePattern::new(PatternSyntax::Path, b"foo/a", Path::new("")),
              ///          IgnorePattern::new(PatternSyntax::RelPath, b"b", Path::new("")),
              ///          IgnorePattern::new(PatternSyntax::Glob, b"*.h", Path::new("")),
              ///     ];
              /// let matcher = PatternMatcher::new(ignore_patterns).unwrap();
              /// ///
              /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true); // matches re:.*\.c$
              /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
              /// assert_eq!(matcher.matches(HgPath::new(b"foo/a")), true); // matches path:foo/a
              /// assert_eq!(matcher.matches(HgPath::new(b"a")), false); // does not match path:b, since 'root' is 'foo'
              /// assert_eq!(matcher.matches(HgPath::new(b"b")), true); // matches relpath:b, since 'root' is 'foo'
              /// assert_eq!(matcher.matches(HgPath::new(b"lib.h")), true); // matches glob:*.h
              /// assert_eq!(matcher.file_set().unwrap(),
              ///            &HashSet::from([HgPathBuf::from_bytes(b""), HgPathBuf::from_bytes(b"foo/a"),
              ///                            HgPathBuf::from_bytes(b""), HgPathBuf::from_bytes(b"b")]));
              /// assert_eq!(matcher.exact_match(HgPath::new(b"foo/a")), true);
              /// assert_eq!(matcher.exact_match(HgPath::new(b"b")), true);
              /// assert_eq!(matcher.exact_match(HgPath::new(b"lib.h")), false); // exact matches are for (rel)path kinds
              /// ```
              pub struct PatternMatcher<'a> {
                  patterns: Vec<u8>,
                  match_fn: IgnoreFnType<'a>,
                  /// Whether all the patterns match a prefix (i.e. recursively)
                  prefix: bool,
                  files: HashSet<HgPathBuf>,
                  dirs: DirsMultiset,
              }
              impl core::fmt::Debug for PatternMatcher<'_> {
                  fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
                      f.debug_struct("PatternMatcher")
                          .field("patterns", &String::from_utf8_lossy(&self.patterns))
                          .field("prefix", &self.prefix)
                          .field("files", &self.files)
                          .field("dirs", &self.dirs)
                          .finish()
                  }
              }
              impl<'a> PatternMatcher<'a> {
                  pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
                      let (files, _) = roots_and_dirs(&ignore_patterns);
                      let dirs = DirsMultiset::from_manifest(&files)?;
-                     let files: HashSet<HgPathBuf> = HashSet::from_iter(files.into_iter());
+                     let files: HashSet<HgPathBuf> = HashSet::from_iter(files);
                      let prefix = ignore_patterns.iter().all(|k| {
                          matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath)
                      });
                      let (patterns, match_fn) = build_match(ignore_patterns, b"$")?;
                      Ok(Self {
                          patterns,
                          match_fn,
                          prefix,
                          files,
                          dirs,
                      })
                  }
              }
              impl<'a> Matcher for PatternMatcher<'a> {
                  fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
                      Some(&self.files)
                  }
                  fn exact_match(&self, filename: &HgPath) -> bool {
                      self.files.contains(filename)
                  }
                  fn matches(&self, filename: &HgPath) -> bool {
                      if self.files.contains(filename) {
                          return true;
                      }
                      (self.match_fn)(filename)
                  }
                  fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
                      if self.prefix && self.files.contains(directory) {
                          return VisitChildrenSet::Recursive;
                      }
                      let path_or_parents_in_set = find_dirs(directory)
                          .any(|parent_dir| self.files.contains(parent_dir));
                      if self.dirs.contains(directory) || path_or_parents_in_set {
                          VisitChildrenSet::This
                      } else {
                          VisitChildrenSet::Empty
                      }
                  }
                  fn matches_everything(&self) -> bool {
                      false
                  }
                  fn is_exact(&self) -> bool {
                      false
                  }
              }
              /// Matches files that are included in the ignore rules.
              /// ```
              /// use hg::{
              ///     matchers::{IncludeMatcher, Matcher},
              ///     IgnorePattern,
              ///     PatternSyntax,
              ///     utils::hg_path::HgPath
              /// };
              /// use std::path::Path;
              /// ///
              /// let ignore_patterns =
              /// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
              /// let matcher = IncludeMatcher::new(ignore_patterns).unwrap();
              /// ///
              /// assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
              /// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
              /// assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
              /// assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
              /// ```
              pub struct IncludeMatcher<'a> {
                  patterns: Vec<u8>,
                  match_fn: IgnoreFnType<'a>,
                  /// Whether all the patterns match a prefix (i.e. recursively)
                  prefix: bool,
                  roots: HashSet<HgPathBuf>,
                  dirs: HashSet<HgPathBuf>,
                  parents: HashSet<HgPathBuf>,
              }
              impl core::fmt::Debug for IncludeMatcher<'_> {
                  fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
                      f.debug_struct("IncludeMatcher")
                          .field("patterns", &String::from_utf8_lossy(&self.patterns))
                          .field("prefix", &self.prefix)
                          .field("roots", &self.roots)
                          .field("dirs", &self.dirs)
                          .field("parents", &self.parents)
                          .finish()
                  }
              }
              impl<'a> Matcher for IncludeMatcher<'a> {
                  fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
                      None
                  }
                  fn exact_match(&self, _filename: &HgPath) -> bool {
                      false
                  }
                  fn matches(&self, filename: &HgPath) -> bool {
                      (self.match_fn)(filename)
                  }
                  fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
                      let dir = directory;
                      if self.prefix && self.roots.contains(dir) {
                          return VisitChildrenSet::Recursive;
                      }
                      if self.roots.contains(HgPath::new(b""))
                          || self.roots.contains(dir)
                          || self.dirs.contains(dir)
                          || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
                      {
                          return VisitChildrenSet::This;
                      }
                      if self.parents.contains(dir.as_ref()) {
                          let multiset = self.get_all_parents_children();
                          if let Some(children) = multiset.get(dir) {
                              return VisitChildrenSet::Set(
                                  children.iter().map(HgPathBuf::from).collect(),
                              );
                          }
                      }
                      VisitChildrenSet::Empty
                  }
                  fn matches_everything(&self) -> bool {
                      false
                  }
                  fn is_exact(&self) -> bool {
                      false
                  }
              }
              /// The union of multiple matchers. Will match if any of the matchers match.
              #[derive(Debug)]
              pub struct UnionMatcher {
                  matchers: Vec<Box<dyn Matcher + Sync>>,
              }
              impl Matcher for UnionMatcher {
                  fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
                      None
                  }
                  fn exact_match(&self, _filename: &HgPath) -> bool {
                      false
                  }
                  fn matches(&self, filename: &HgPath) -> bool {
                      self.matchers.iter().any(|m| m.matches(filename))
                  }
                  fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
                      let mut result = HashSet::new();
                      let mut this = false;
                      for matcher in self.matchers.iter() {
                          let visit = matcher.visit_children_set(directory);
                          match visit {
                              VisitChildrenSet::Empty => continue,
                              VisitChildrenSet::This => {
                                  this = true;
                                  // Don't break, we might have an 'all' in here.
                                  continue;
                              }
                              VisitChildrenSet::Set(set) => {
                                  result.extend(set);
                              }
                              VisitChildrenSet::Recursive => {
                                  return visit;
                              }
                          }
                      }
                      if this {
                          return VisitChildrenSet::This;
                      }
                      if result.is_empty() {
                          VisitChildrenSet::Empty
                      } else {
                          VisitChildrenSet::Set(result)
                      }
                  }
                  fn matches_everything(&self) -> bool {
                      // TODO Maybe if all are AlwaysMatcher?
                      false
                  }
                  fn is_exact(&self) -> bool {
                      false
                  }
              }
              impl UnionMatcher {
                  pub fn new(matchers: Vec<Box<dyn Matcher + Sync>>) -> Self {
                      Self { matchers }
                  }
              }
              #[derive(Debug)]
              pub struct IntersectionMatcher {
                  m1: Box<dyn Matcher + Sync>,
                  m2: Box<dyn Matcher + Sync>,
                  files: Option<HashSet<HgPathBuf>>,
              }
              impl Matcher for IntersectionMatcher {
                  fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
                      self.files.as_ref()
                  }
                  fn exact_match(&self, filename: &HgPath) -> bool {
                      self.files.as_ref().map_or(false, |f| f.contains(filename))
                  }
                  fn matches(&self, filename: &HgPath) -> bool {
                      self.m1.matches(filename) && self.m2.matches(filename)
                  }
                  fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
                      let m1_set = self.m1.visit_children_set(directory);
                      if m1_set == VisitChildrenSet::Empty {
                          return VisitChildrenSet::Empty;
                      }
                      let m2_set = self.m2.visit_children_set(directory);
                      if m2_set == VisitChildrenSet::Empty {
                          return VisitChildrenSet::Empty;
                      }
                      if m1_set == VisitChildrenSet::Recursive {
                          return m2_set;
                      } else if m2_set == VisitChildrenSet::Recursive {
                          return m1_set;
                      }
                      match (&m1_set, &m2_set) {
                          (VisitChildrenSet::Recursive, _) => m2_set,
                          (_, VisitChildrenSet::Recursive) => m1_set,
                          (VisitChildrenSet::This, _) | (_, VisitChildrenSet::This) => {
                              VisitChildrenSet::This
                          }
                          (VisitChildrenSet::Set(m1), VisitChildrenSet::Set(m2)) => {
                              let set: HashSet<_> = m1.intersection(m2).cloned().collect();
                              if set.is_empty() {
                                  VisitChildrenSet::Empty
                              } else {
                                  VisitChildrenSet::Set(set)
                              }
                          }
                          _ => unreachable!(),
                      }
                  }
                  fn matches_everything(&self) -> bool {
                      self.m1.matches_everything() && self.m2.matches_everything()
                  }
                  fn is_exact(&self) -> bool {
                      self.m1.is_exact() || self.m2.is_exact()
                  }
              }
              impl IntersectionMatcher {
                  pub fn new(
                      mut m1: Box<dyn Matcher + Sync>,
                      mut m2: Box<dyn Matcher + Sync>,
                  ) -> Self {
                      let files = if m1.is_exact() || m2.is_exact() {
                          if !m1.is_exact() {
                              std::mem::swap(&mut m1, &mut m2);
                          }
                          m1.file_set().map(|m1_files| {
                              m1_files.iter().cloned().filter(|f| m2.matches(f)).collect()
                          })
                      } else {
                          // without exact input file sets, we can't do an exact
                          // intersection, so we must over-approximate by
                          // unioning instead
                          m1.file_set().map(|m1_files| match m2.file_set() {
                              Some(m2_files) => m1_files.union(m2_files).cloned().collect(),
                              None => m1_files.iter().cloned().collect(),
                          })
                      };
                      Self { m1, m2, files }
                  }
              }
              #[derive(Debug)]
              pub struct DifferenceMatcher {
                  base: Box<dyn Matcher + Sync>,
                  excluded: Box<dyn Matcher + Sync>,
                  files: Option<HashSet<HgPathBuf>>,
              }
              impl Matcher for DifferenceMatcher {
                  fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
                      self.files.as_ref()
                  }
                  fn exact_match(&self, filename: &HgPath) -> bool {
                      self.files.as_ref().map_or(false, |f| f.contains(filename))
                  }
                  fn matches(&self, filename: &HgPath) -> bool {
                      self.base.matches(filename) && !self.excluded.matches(filename)
                  }
                  fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
                      let excluded_set = self.excluded.visit_children_set(directory);
                      if excluded_set == VisitChildrenSet::Recursive {
                          return VisitChildrenSet::Empty;
                      }
                      let base_set = self.base.visit_children_set(directory);
                      // Possible values for base: 'recursive', 'this', set(...), set()
                      // Possible values for excluded:          'this', set(...), set()
                      // If excluded has nothing under here that we care about, return base,
                      // even if it's 'recursive'.
                      if excluded_set == VisitChildrenSet::Empty {
                          return base_set;
                      }
                      match base_set {
                          VisitChildrenSet::This | VisitChildrenSet::Recursive => {
                              // Never return 'recursive' here if excluded_set is any kind of
                              // non-empty (either 'this' or set(foo)), since excluded might
                              // return set() for a subdirectory.
                              VisitChildrenSet::This
                          }
                          set => {
                              // Possible values for base:         set(...), set()
                              // Possible values for excluded: 'this', set(...)
                              // We ignore excluded set results. They're possibly incorrect:
                              //  base = path:dir/subdir
                              //  excluded=rootfilesin:dir,
                              //  visit_children_set(''):
                              //   base returns {'dir'}, excluded returns {'dir'}, if we
                              //   subtracted we'd return set(), which is *not* correct, we
                              //   still need to visit 'dir'!
                              set
                          }
                      }
                  }
                  fn matches_everything(&self) -> bool {
                      false
                  }
                  fn is_exact(&self) -> bool {
                      self.base.is_exact()
                  }
              }
              impl DifferenceMatcher {
                  pub fn new(
                      base: Box<dyn Matcher + Sync>,
                      excluded: Box<dyn Matcher + Sync>,
                  ) -> Self {
                      let base_is_exact = base.is_exact();
                      let base_files = base.file_set().map(ToOwned::to_owned);
                      let mut new = Self {
                          base,
                          excluded,
                          files: None,
                      };
                      if base_is_exact {
                          new.files = base_files.map(|files| {
                              files.iter().cloned().filter(|f| new.matches(f)).collect()
                          });
                      }
                      new
                  }
              }
              /// Wraps [`regex::bytes::Regex`] to improve performance in multithreaded
              /// contexts.
              ///
              /// The `status` algorithm makes heavy use of threads, and calling `is_match`
              /// from many threads at once is prone to contention, probably within the
              /// scratch space needed as the regex DFA is built lazily.
              ///
              /// We are in the process of raising the issue upstream, but for now
              /// the workaround used here is to store the `Regex` in a lazily populated
              /// thread-local variable, sharing the initial read-only compilation, but
              /// not the lazy dfa scratch space mentioned above.
              ///
              /// This reduces the contention observed with 16+ threads, but does not
              /// completely remove it. Hopefully this can be addressed upstream.
              struct RegexMatcher {
                  /// Compiled at the start of the status algorithm, used as a base for
                  /// cloning in each thread-local `self.local`, thus sharing the expensive
                  /// first compilation.
                  base: regex::bytes::Regex,
                  /// Thread-local variable that holds the `Regex` that is actually queried
                  /// from each thread.
                  local: thread_local::ThreadLocal<regex::bytes::Regex>,
              }
              impl RegexMatcher {
                  /// Returns whether the path matches the stored `Regex`.
                  pub fn is_match(&self, path: &HgPath) -> bool {
                      self.local
                          .get_or(|| self.base.clone())
                          .is_match(path.as_bytes())
                  }
              }
              /// Returns a function that matches an `HgPath` against the given regex
              /// pattern.
              ///
              /// This can fail when the pattern is invalid or not supported by the
              /// underlying engine (the `regex` crate), for instance anything with
              /// back-references.
              #[logging_timer::time("trace")]
              fn re_matcher(pattern: &[u8]) -> PatternResult<RegexMatcher> {
                  use std::io::Write;
                  // The `regex` crate adds `.*` to the start and end of expressions if there
                  // are no anchors, so add the start anchor.
                  let mut escaped_bytes = vec![b'^', b'(', b'?', b':'];
                  for byte in pattern {
                      if *byte > 127 {
                          write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
                      } else {
                          escaped_bytes.push(*byte);
                      }
                  }
                  escaped_bytes.push(b')');
                  // Avoid the cost of UTF8 checking
                  //
                  // # Safety
                  // This is safe because we escaped all non-ASCII bytes.
                  let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
                  let re = regex::bytes::RegexBuilder::new(&pattern_string)
                      .unicode(false)
                      // Big repos with big `.hgignore` will hit the default limit and
                      // incur a significant performance hit. One repo's `hg status` hit
                      // multiple *minutes*.
                      .dfa_size_limit(50 * (1 << 20))
                      .build()
                      .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
                  Ok(RegexMatcher {
                      base: re,
                      local: Default::default(),
                  })
              }
              /// Returns the regex pattern and a function that matches an `HgPath` against
              /// said regex formed by the given ignore patterns.
-             fn build_regex_match<'a, 'b>(
-                 ignore_patterns: &'a [IgnorePattern],
+             fn build_regex_match<'a>(
+                 ignore_patterns: &[IgnorePattern],
                  glob_suffix: &[u8],
-             ) -> PatternResult<(Vec<u8>, IgnoreFnType<'b>)> {
+             ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> {
                  let mut regexps = vec![];
                  let mut exact_set = HashSet::new();
                  for pattern in ignore_patterns {
                      if let Some(re) = build_single_regex(pattern, glob_suffix)? {
                          regexps.push(re);
                      } else {
                          let exact = normalize_path_bytes(&pattern.pattern);
                          exact_set.insert(HgPathBuf::from_bytes(&exact));
                      }
                  }
                  let full_regex = regexps.join(&b'|');
                  // An empty pattern would cause the regex engine to incorrectly match the
                  // (empty) root directory
                  let func = if !(regexps.is_empty()) {
                      let matcher = re_matcher(&full_regex)?;
                      let func = move |filename: &HgPath| {
                          exact_set.contains(filename) || matcher.is_match(filename)
                      };
                      Box::new(func) as IgnoreFnType
                  } else {
                      let func = move |filename: &HgPath| exact_set.contains(filename);
                      Box::new(func) as IgnoreFnType
                  };
                  Ok((full_regex, func))
              }
              /// Returns roots and directories corresponding to each pattern.
              ///
              /// This calculates the roots and directories exactly matching the patterns and
              /// returns a tuple of (roots, dirs). It does not return other directories
              /// which may also need to be considered, like the parent directories.
              fn roots_and_dirs(
                  ignore_patterns: &[IgnorePattern],
              ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
                  let mut roots = Vec::new();
                  let mut dirs = Vec::new();
                  for ignore_pattern in ignore_patterns {
                      let IgnorePattern {
                          syntax, pattern, ..
                      } = ignore_pattern;
                      match syntax {
                          PatternSyntax::RootGlob | PatternSyntax::Glob => {
                              let mut root = HgPathBuf::new();
                              for p in pattern.split(|c| *c == b'/') {
                                  if p.iter()
                                      .any(|c| matches!(*c, b'[' | b'{' | b'*' | b'?'))
                                  {
                                      break;
                                  }
                                  root.push(HgPathBuf::from_bytes(p).as_ref());
                              }
                              roots.push(root);
                          }
                          PatternSyntax::Path
                          | PatternSyntax::RelPath
                          | PatternSyntax::FilePath => {
                              let pat = HgPath::new(if pattern == b"." {
                                  &[] as &[u8]
                              } else {
                                  pattern
                              });
                              roots.push(pat.to_owned());
                          }
                          PatternSyntax::RootFiles => {
                              let pat = if pattern == b"." {
                                  &[] as &[u8]
                              } else {
                                  pattern
                              };
                              dirs.push(HgPathBuf::from_bytes(pat));
                          }
                          _ => {
                              roots.push(HgPathBuf::new());
                          }
                      }
                  }
                  (roots, dirs)
              }
              /// Paths extracted from patterns
              #[derive(Debug, PartialEq)]
              struct RootsDirsAndParents {
                  /// Directories to match recursively
                  pub roots: HashSet<HgPathBuf>,
                  /// Directories to match non-recursively
                  pub dirs: HashSet<HgPathBuf>,
                  /// Implicitly required directories to go to items in either roots or dirs
                  pub parents: HashSet<HgPathBuf>,
              }
              /// Extract roots, dirs and parents from patterns.
              fn roots_dirs_and_parents(
                  ignore_patterns: &[IgnorePattern],
              ) -> PatternResult<RootsDirsAndParents> {
                  let (roots, dirs) = roots_and_dirs(ignore_patterns);
                  let mut parents = HashSet::new();
                  parents.extend(
                      DirsMultiset::from_manifest(&dirs)?
                          .iter()
                          .map(ToOwned::to_owned),
                  );
                  parents.extend(
                      DirsMultiset::from_manifest(&roots)?
                          .iter()
                          .map(ToOwned::to_owned),
                  );
                  Ok(RootsDirsAndParents {
                      roots: HashSet::from_iter(roots),
                      dirs: HashSet::from_iter(dirs),
                      parents,
                  })
              }
              /// Returns a function that checks whether a given file (in the general sense)
              /// should be matched.
              fn build_match<'a>(
                  ignore_patterns: Vec<IgnorePattern>,
                  glob_suffix: &[u8],
              ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> {
                  let mut match_funcs: Vec<IgnoreFnType<'a>> = vec![];
                  // For debugging and printing
                  let mut patterns = vec![];
                  let (subincludes, ignore_patterns) = filter_subincludes(ignore_patterns)?;
                  if !subincludes.is_empty() {
                      // Build prefix-based matcher functions for subincludes
                      let mut submatchers = FastHashMap::default();
                      let mut prefixes = vec![];
                      for sub_include in subincludes {
                          let matcher = IncludeMatcher::new(sub_include.included_patterns)?;
                          let match_fn =
                              Box::new(move |path: &HgPath| matcher.matches(path));
                          prefixes.push(sub_include.prefix.clone());
                          submatchers.insert(sub_include.prefix.clone(), match_fn);
                      }
                      let match_subinclude = move |filename: &HgPath| {
                          for prefix in prefixes.iter() {
                              if let Some(rel) = filename.relative_to(prefix) {
                                  if (submatchers[prefix])(rel) {
                                      return true;
                                  }
                              }
                          }
                          false
                      };
                      match_funcs.push(Box::new(match_subinclude));
                  }
                  if !ignore_patterns.is_empty() {
                      // Either do dumb matching if all patterns are rootfiles, or match
                      // with a regex.
                      if ignore_patterns
                          .iter()
                          .all(|k| k.syntax == PatternSyntax::RootFiles)
                      {
                          let dirs: HashSet<_> = ignore_patterns
                              .iter()
                              .map(|k| k.pattern.to_owned())
                              .collect();
                          let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
                          let match_func = move |path: &HgPath| -> bool {
                              let path = path.as_bytes();
                              let i = path.iter().rfind(|a| **a == b'/');
                              let dir = if let Some(i) = i {
                                  &path[..*i as usize]
                              } else {
                                  b"."
                              };
-                             dirs.contains(dir.deref())
+                             dirs.contains(dir)
                          };
                          match_funcs.push(Box::new(match_func));
                          patterns.extend(b"rootfilesin: ");
                          dirs_vec.sort();
                          patterns.extend(dirs_vec.escaped_bytes());
                      } else {
                          let (new_re, match_func) =
                              build_regex_match(&ignore_patterns, glob_suffix)?;
                          patterns = new_re;
                          match_funcs.push(match_func)
                      }
                  }
                  Ok(if match_funcs.len() == 1 {
                      (patterns, match_funcs.remove(0))
                  } else {
                      (
                          patterns,
                          Box::new(move |f: &HgPath| -> bool {
                              match_funcs.iter().any(|match_func| match_func(f))
                          }),
                      )
                  })
              }
              /// Parses all "ignore" files with their recursive includes and returns a
              /// function that checks whether a given file (in the general sense) should be
              /// ignored.
              pub fn get_ignore_matcher<'a>(
                  mut all_pattern_files: Vec<PathBuf>,
                  root_dir: &Path,
                  inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
              ) -> PatternResult<(IncludeMatcher<'a>, Vec<PatternFileWarning>)> {
                  let mut all_patterns = vec![];
                  let mut all_warnings = vec![];
                  // Sort to make the ordering of calls to `inspect_pattern_bytes`
                  // deterministic even if the ordering of `all_pattern_files` is not (such
                  // as when a iteration order of a Python dict or Rust HashMap is involved).
                  // Sort by "string" representation instead of the default by component
                  // (with a Rust-specific definition of a component)
                  all_pattern_files
                      .sort_unstable_by(|a, b| a.as_os_str().cmp(b.as_os_str()));
                  for pattern_file in &all_pattern_files {
                      let (patterns, warnings) = get_patterns_from_file(
                          pattern_file,
                          root_dir,
                          inspect_pattern_bytes,
                      )?;
                      all_patterns.extend(patterns.to_owned());
                      all_warnings.extend(warnings);
                  }
                  let matcher = IncludeMatcher::new(all_patterns)?;
                  Ok((matcher, all_warnings))
              }
              /// Parses all "ignore" files with their recursive includes and returns a
              /// function that checks whether a given file (in the general sense) should be
              /// ignored.
              pub fn get_ignore_function<'a>(
                  all_pattern_files: Vec<PathBuf>,
                  root_dir: &Path,
                  inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
              ) -> PatternResult<(IgnoreFnType<'a>, Vec<PatternFileWarning>)> {
                  let res =
                      get_ignore_matcher(all_pattern_files, root_dir, inspect_pattern_bytes);
                  res.map(|(matcher, all_warnings)| {
                      let res: IgnoreFnType<'a> =
                          Box::new(move |path: &HgPath| matcher.matches(path));
                      (res, all_warnings)
                  })
              }
              impl<'a> IncludeMatcher<'a> {
                  pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
                      let RootsDirsAndParents {
                          roots,
                          dirs,
                          parents,
                      } = roots_dirs_and_parents(&ignore_patterns)?;
                      let prefix = ignore_patterns.iter().all(|k| {
                          matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath)
                      });
                      let (patterns, match_fn) = build_match(ignore_patterns, b"(?:/|$)")?;
                      Ok(Self {
                          patterns,
                          match_fn,
                          prefix,
                          roots,
                          dirs,
                          parents,
                      })
                  }
                  fn get_all_parents_children(&self) -> DirsChildrenMultiset {
                      // TODO cache
                      let thing = self
                          .dirs
                          .iter()
                          .chain(self.roots.iter())
                          .chain(self.parents.iter());
                      DirsChildrenMultiset::new(thing, Some(&self.parents))
                  }
                  pub fn debug_get_patterns(&self) -> &[u8] {
                      self.patterns.as_ref()
                  }
              }
              impl<'a> Display for IncludeMatcher<'a> {
                  fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
                      // XXX What about exact matches?
                      // I'm not sure it's worth it to clone the HashSet and keep it
                      // around just in case someone wants to display the matcher, plus
                      // it's going to be unreadable after a few entries, but we need to
                      // inform in this display that exact matches are being used and are
                      // (on purpose) missing from the `includes`.
                      write!(
                          f,
                          "IncludeMatcher(includes='{}')",
                          String::from_utf8_lossy(&self.patterns.escaped_bytes())
                      )
                  }
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  use pretty_assertions::assert_eq;
                  use std::path::Path;
                  #[test]
                  fn test_roots_and_dirs() {
                      let pats = vec![
                          IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
                          IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
                          IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
                      ];
                      let (roots, dirs) = roots_and_dirs(&pats);
                      assert_eq!(
                          roots,
                          vec!(
                              HgPathBuf::from_bytes(b"g/h"),
                              HgPathBuf::from_bytes(b"g/h"),
                              HgPathBuf::new()
                          ),
                      );
                      assert_eq!(dirs, vec!());
                  }
                  #[test]
                  fn test_roots_dirs_and_parents() {
                      let pats = vec![
                          IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
                          IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
                          IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
                      ];
                      let mut roots = HashSet::new();
                      roots.insert(HgPathBuf::from_bytes(b"g/h"));
                      roots.insert(HgPathBuf::new());
                      let dirs = HashSet::new();
                      let mut parents = HashSet::new();
                      parents.insert(HgPathBuf::new());
                      parents.insert(HgPathBuf::from_bytes(b"g"));
                      assert_eq!(
                          roots_dirs_and_parents(&pats).unwrap(),
                          RootsDirsAndParents {
                              roots,
                              dirs,
                              parents
                          }
                      );
                  }
                  #[test]
                  fn test_filematcher_visit_children_set() {
                      // Visitchildrenset
                      let files = vec![HgPathBuf::from_bytes(b"dir/subdir/foo.txt")];
                      let matcher = FileMatcher::new(files).unwrap();
                      let mut set = HashSet::new();
                      set.insert(HgPathBuf::from_bytes(b"dir"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::Set(set)
                      );
                      let mut set = HashSet::new();
                      set.insert(HgPathBuf::from_bytes(b"subdir"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir")),
                          VisitChildrenSet::Set(set)
                      );
                      let mut set = HashSet::new();
                      set.insert(HgPathBuf::from_bytes(b"foo.txt"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                          VisitChildrenSet::Set(set)
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"folder")),
                          VisitChildrenSet::Empty
                      );
                  }
                  #[test]
                  fn test_filematcher_visit_children_set_files_and_dirs() {
                      let files = vec![
                          HgPathBuf::from_bytes(b"rootfile.txt"),
                          HgPathBuf::from_bytes(b"a/file1.txt"),
                          HgPathBuf::from_bytes(b"a/b/file2.txt"),
                          // No file in a/b/c
                          HgPathBuf::from_bytes(b"a/b/c/d/file4.txt"),
                      ];
                      let matcher = FileMatcher::new(files).unwrap();
                      let mut set = HashSet::new();
                      set.insert(HgPathBuf::from_bytes(b"a"));
                      set.insert(HgPathBuf::from_bytes(b"rootfile.txt"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::Set(set)
                      );
                      let mut set = HashSet::new();
                      set.insert(HgPathBuf::from_bytes(b"b"));
                      set.insert(HgPathBuf::from_bytes(b"file1.txt"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"a")),
                          VisitChildrenSet::Set(set)
                      );
                      let mut set = HashSet::new();
                      set.insert(HgPathBuf::from_bytes(b"c"));
                      set.insert(HgPathBuf::from_bytes(b"file2.txt"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"a/b")),
                          VisitChildrenSet::Set(set)
                      );
                      let mut set = HashSet::new();
                      set.insert(HgPathBuf::from_bytes(b"d"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"a/b/c")),
                          VisitChildrenSet::Set(set)
                      );
                      let mut set = HashSet::new();
                      set.insert(HgPathBuf::from_bytes(b"file4.txt"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
                          VisitChildrenSet::Set(set)
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"folder")),
                          VisitChildrenSet::Empty
                      );
                  }
                  #[test]
                  fn test_patternmatcher() {
                      // VisitdirPrefix
                      let m = PatternMatcher::new(vec![IgnorePattern::new(
                          PatternSyntax::Path,
                          b"dir/subdir",
                          Path::new(""),
                      )])
                      .unwrap();
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::This
                      );
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"dir")),
                          VisitChildrenSet::This
                      );
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"dir/subdir")),
                          VisitChildrenSet::Recursive
                      );
                      // OPT: This should probably be Recursive if its parent is?
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"dir/subdir/x")),
                          VisitChildrenSet::This
                      );
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"folder")),
                          VisitChildrenSet::Empty
                      );
                      // VisitchildrensetPrefix
                      let m = PatternMatcher::new(vec![IgnorePattern::new(
                          PatternSyntax::Path,
                          b"dir/subdir",
                          Path::new(""),
                      )])
                      .unwrap();
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::This
                      );
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"dir")),
                          VisitChildrenSet::This
                      );
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"dir/subdir")),
                          VisitChildrenSet::Recursive
                      );
                      // OPT: This should probably be Recursive if its parent is?
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"dir/subdir/x")),
                          VisitChildrenSet::This
                      );
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"folder")),
                          VisitChildrenSet::Empty
                      );
                      // VisitdirRootfilesin
                      let m = PatternMatcher::new(vec![IgnorePattern::new(
                          PatternSyntax::RootFiles,
                          b"dir/subdir",
                          Path::new(""),
                      )])
                      .unwrap();
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"dir/subdir/x")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"folder")),
                          VisitChildrenSet::Empty
                      );
                      // FIXME: These should probably be This.
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"dir")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"dir/subdir")),
                          VisitChildrenSet::Empty
                      );
                      // VisitchildrensetRootfilesin
                      let m = PatternMatcher::new(vec![IgnorePattern::new(
                          PatternSyntax::RootFiles,
                          b"dir/subdir",
                          Path::new(""),
                      )])
                      .unwrap();
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"dir/subdir/x")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"folder")),
                          VisitChildrenSet::Empty
                      );
                      // FIXME: These should probably be {'dir'}, {'subdir'} and This,
                      // respectively, or at least This for all three.
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"dir")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"dir/subdir")),
                          VisitChildrenSet::Empty
                      );
                      // VisitdirGlob
                      let m = PatternMatcher::new(vec![IgnorePattern::new(
                          PatternSyntax::Glob,
                          b"dir/z*",
                          Path::new(""),
                      )])
                      .unwrap();
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::This
                      );
                      // FIXME: This probably should be This
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"dir")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"folder")),
                          VisitChildrenSet::Empty
                      );
                      // OPT: these should probably be False.
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"dir/subdir")),
                          VisitChildrenSet::This
                      );
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"dir/subdir/x")),
                          VisitChildrenSet::This
                      );
                      // VisitchildrensetGlob
                      let m = PatternMatcher::new(vec![IgnorePattern::new(
                          PatternSyntax::Glob,
                          b"dir/z*",
                          Path::new(""),
                      )])
                      .unwrap();
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::This
                      );
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"folder")),
                          VisitChildrenSet::Empty
                      );
                      // FIXME: This probably should be This
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"dir")),
                          VisitChildrenSet::Empty
                      );
                      // OPT: these should probably be Empty
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"dir/subdir")),
                          VisitChildrenSet::This
                      );
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"dir/subdir/x")),
                          VisitChildrenSet::This
                      );
                      // VisitdirFilepath
                      let m = PatternMatcher::new(vec![IgnorePattern::new(
                          PatternSyntax::FilePath,
                          b"dir/z",
                          Path::new(""),
                      )])
                      .unwrap();
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::This
                      );
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"dir")),
                          VisitChildrenSet::This
                      );
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"folder")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"dir/subdir")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"dir/subdir/x")),
                          VisitChildrenSet::Empty
                      );
                      // VisitchildrensetFilepath
                      let m = PatternMatcher::new(vec![IgnorePattern::new(
                          PatternSyntax::FilePath,
                          b"dir/z",
                          Path::new(""),
                      )])
                      .unwrap();
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::This
                      );
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"folder")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"dir")),
                          VisitChildrenSet::This
                      );
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"dir/subdir")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          m.visit_children_set(HgPath::new(b"dir/subdir/x")),
                          VisitChildrenSet::Empty
                      );
                  }
                  #[test]
                  fn test_includematcher() {
                      // VisitchildrensetPrefix
                      let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
                          PatternSyntax::RelPath,
                          b"dir/subdir",
                          Path::new(""),
                      )])
                      .unwrap();
                      let mut set = HashSet::new();
                      set.insert(HgPathBuf::from_bytes(b"dir"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::Set(set)
                      );
                      let mut set = HashSet::new();
                      set.insert(HgPathBuf::from_bytes(b"subdir"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir")),
                          VisitChildrenSet::Set(set)
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                          VisitChildrenSet::Recursive
                      );
                      // OPT: This should probably be 'all' if its parent is?
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                          VisitChildrenSet::This
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"folder")),
                          VisitChildrenSet::Empty
                      );
                      // VisitchildrensetRootfilesin
                      let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
                          PatternSyntax::RootFiles,
                          b"dir/subdir",
                          Path::new(""),
                      )])
                      .unwrap();
                      let mut set = HashSet::new();
                      set.insert(HgPathBuf::from_bytes(b"dir"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::Set(set)
                      );
                      let mut set = HashSet::new();
                      set.insert(HgPathBuf::from_bytes(b"subdir"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir")),
                          VisitChildrenSet::Set(set)
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                          VisitChildrenSet::This
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"folder")),
                          VisitChildrenSet::Empty
                      );
                      // VisitchildrensetGlob
                      let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
                          PatternSyntax::Glob,
                          b"dir/z*",
                          Path::new(""),
                      )])
                      .unwrap();
                      let mut set = HashSet::new();
                      set.insert(HgPathBuf::from_bytes(b"dir"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::Set(set)
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"folder")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir")),
                          VisitChildrenSet::This
                      );
                      // OPT: these should probably be set().
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                          VisitChildrenSet::This
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                          VisitChildrenSet::This
                      );
                      // VisitchildrensetFilePath
                      let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
                          PatternSyntax::FilePath,
                          b"dir/z",
                          Path::new(""),
                      )])
                      .unwrap();
                      let mut set = HashSet::new();
                      set.insert(HgPathBuf::from_bytes(b"dir"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::Set(set)
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"folder")),
                          VisitChildrenSet::Empty
                      );
                      let mut set = HashSet::new();
                      set.insert(HgPathBuf::from_bytes(b"z"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir")),
                          VisitChildrenSet::Set(set)
                      );
                      // OPT: these should probably be set().
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                          VisitChildrenSet::Empty
                      );
                      // Test multiple patterns
                      let matcher = IncludeMatcher::new(vec![
                          IgnorePattern::new(PatternSyntax::RelPath, b"foo", Path::new("")),
                          IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
                      ])
                      .unwrap();
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::This
                      );
                      // Test multiple patterns
                      let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
                          PatternSyntax::Glob,
                          b"**/*.exe",
                          Path::new(""),
                      )])
                      .unwrap();
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::This
                      );
                  }
                  #[test]
                  fn test_unionmatcher() {
                      // Path + Rootfiles
                      let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
                          PatternSyntax::RelPath,
                          b"dir/subdir",
                          Path::new(""),
                      )])
                      .unwrap();
                      let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
                          PatternSyntax::RootFiles,
                          b"dir",
                          Path::new(""),
                      )])
                      .unwrap();
                      let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
                      let mut set = HashSet::new();
                      set.insert(HgPathBuf::from_bytes(b"dir"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::Set(set)
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir")),
                          VisitChildrenSet::This
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                          VisitChildrenSet::Recursive
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/foo")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"folder")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"folder")),
                          VisitChildrenSet::Empty
                      );
                      // OPT: These next two could be 'all' instead of 'this'.
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
                          VisitChildrenSet::This
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                          VisitChildrenSet::This
                      );
                      // Path + unrelated Path
                      let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
                          PatternSyntax::RelPath,
                          b"dir/subdir",
                          Path::new(""),
                      )])
                      .unwrap();
                      let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
                          PatternSyntax::RelPath,
                          b"folder",
                          Path::new(""),
                      )])
                      .unwrap();
                      let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
                      let mut set = HashSet::new();
                      set.insert(HgPathBuf::from_bytes(b"folder"));
                      set.insert(HgPathBuf::from_bytes(b"dir"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::Set(set)
                      );
                      let mut set = HashSet::new();
                      set.insert(HgPathBuf::from_bytes(b"subdir"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir")),
                          VisitChildrenSet::Set(set)
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                          VisitChildrenSet::Recursive
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/foo")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"folder")),
                          VisitChildrenSet::Recursive
                      );
                      // OPT: These next two could be 'all' instead of 'this'.
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
                          VisitChildrenSet::This
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                          VisitChildrenSet::This
                      );
                      // Path + subpath
                      let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
                          PatternSyntax::RelPath,
                          b"dir/subdir/x",
                          Path::new(""),
                      )])
                      .unwrap();
                      let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
                          PatternSyntax::RelPath,
                          b"dir/subdir",
                          Path::new(""),
                      )])
                      .unwrap();
                      let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
                      let mut set = HashSet::new();
                      set.insert(HgPathBuf::from_bytes(b"dir"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::Set(set)
                      );
                      let mut set = HashSet::new();
                      set.insert(HgPathBuf::from_bytes(b"subdir"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir")),
                          VisitChildrenSet::Set(set)
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                          VisitChildrenSet::Recursive
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/foo")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"folder")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                          VisitChildrenSet::Recursive
                      );
                      // OPT: this should probably be 'all' not 'this'.
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
                          VisitChildrenSet::This
                      );
                  }
                  #[test]
                  fn test_intersectionmatcher() {
                      // Include path + Include rootfiles
                      let m1 = Box::new(
                          IncludeMatcher::new(vec![IgnorePattern::new(
                              PatternSyntax::RelPath,
                              b"dir/subdir",
                              Path::new(""),
                          )])
                          .unwrap(),
                      );
                      let m2 = Box::new(
                          IncludeMatcher::new(vec![IgnorePattern::new(
                              PatternSyntax::RootFiles,
                              b"dir",
                              Path::new(""),
                          )])
                          .unwrap(),
                      );
                      let matcher = IntersectionMatcher::new(m1, m2);
                      let mut set = HashSet::new();
                      set.insert(HgPathBuf::from_bytes(b"dir"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::Set(set)
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir")),
                          VisitChildrenSet::This
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/foo")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"folder")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                          VisitChildrenSet::Empty
                      );
                      // Non intersecting paths
                      let m1 = Box::new(
                          IncludeMatcher::new(vec![IgnorePattern::new(
                              PatternSyntax::RelPath,
                              b"dir/subdir",
                              Path::new(""),
                          )])
                          .unwrap(),
                      );
                      let m2 = Box::new(
                          IncludeMatcher::new(vec![IgnorePattern::new(
                              PatternSyntax::RelPath,
                              b"folder",
                              Path::new(""),
                          )])
                          .unwrap(),
                      );
                      let matcher = IntersectionMatcher::new(m1, m2);
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/foo")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"folder")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                          VisitChildrenSet::Empty
                      );
                      // Nested paths
                      let m1 = Box::new(
                          IncludeMatcher::new(vec![IgnorePattern::new(
                              PatternSyntax::RelPath,
                              b"dir/subdir/x",
                              Path::new(""),
                          )])
                          .unwrap(),
                      );
                      let m2 = Box::new(
                          IncludeMatcher::new(vec![IgnorePattern::new(
                              PatternSyntax::RelPath,
                              b"dir/subdir",
                              Path::new(""),
                          )])
                          .unwrap(),
                      );
                      let matcher = IntersectionMatcher::new(m1, m2);
                      let mut set = HashSet::new();
                      set.insert(HgPathBuf::from_bytes(b"dir"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::Set(set)
                      );
                      let mut set = HashSet::new();
                      set.insert(HgPathBuf::from_bytes(b"subdir"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir")),
                          VisitChildrenSet::Set(set)
                      );
                      let mut set = HashSet::new();
                      set.insert(HgPathBuf::from_bytes(b"x"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                          VisitChildrenSet::Set(set)
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/foo")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"folder")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
                          VisitChildrenSet::Empty
                      );
                      // OPT: this should probably be 'all' not 'this'.
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                          VisitChildrenSet::This
                      );
                      // Diverging paths
                      let m1 = Box::new(
                          IncludeMatcher::new(vec![IgnorePattern::new(
                              PatternSyntax::RelPath,
                              b"dir/subdir/x",
                              Path::new(""),
                          )])
                          .unwrap(),
                      );
                      let m2 = Box::new(
                          IncludeMatcher::new(vec![IgnorePattern::new(
                              PatternSyntax::RelPath,
                              b"dir/subdir/z",
                              Path::new(""),
                          )])
                          .unwrap(),
                      );
                      let matcher = IntersectionMatcher::new(m1, m2);
                      // OPT: these next two could probably be Empty as well.
                      let mut set = HashSet::new();
                      set.insert(HgPathBuf::from_bytes(b"dir"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::Set(set)
                      );
                      // OPT: these next two could probably be Empty as well.
                      let mut set = HashSet::new();
                      set.insert(HgPathBuf::from_bytes(b"subdir"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir")),
                          VisitChildrenSet::Set(set)
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/foo")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"folder")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                          VisitChildrenSet::Empty
                      );
                  }
                  #[test]
                  fn test_differencematcher() {
                      // Two alwaysmatchers should function like a nevermatcher
                      let m1 = AlwaysMatcher;
                      let m2 = AlwaysMatcher;
                      let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2));
                      for case in &[
                          &b""[..],
                          b"dir",
                          b"dir/subdir",
                          b"dir/subdir/z",
                          b"dir/foo",
                          b"dir/subdir/x",
                          b"folder",
                      ] {
                          assert_eq!(
                              matcher.visit_children_set(HgPath::new(case)),
                              VisitChildrenSet::Empty
                          );
                      }
                      // One always and one never should behave the same as an always
                      let m1 = AlwaysMatcher;
                      let m2 = NeverMatcher;
                      let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2));
                      for case in &[
                          &b""[..],
                          b"dir",
                          b"dir/subdir",
                          b"dir/subdir/z",
                          b"dir/foo",
                          b"dir/subdir/x",
                          b"folder",
                      ] {
                          assert_eq!(
                              matcher.visit_children_set(HgPath::new(case)),
                              VisitChildrenSet::Recursive
                          );
                      }
                      // Two include matchers
                      let m1 = Box::new(
                          IncludeMatcher::new(vec![IgnorePattern::new(
                              PatternSyntax::RelPath,
                              b"dir/subdir",
                              Path::new("/repo"),
                          )])
                          .unwrap(),
                      );
                      let m2 = Box::new(
                          IncludeMatcher::new(vec![IgnorePattern::new(
                              PatternSyntax::RootFiles,
                              b"dir",
                              Path::new("/repo"),
                          )])
                          .unwrap(),
                      );
                      let matcher = DifferenceMatcher::new(m1, m2);
                      let mut set = HashSet::new();
                      set.insert(HgPathBuf::from_bytes(b"dir"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"")),
                          VisitChildrenSet::Set(set)
                      );
                      let mut set = HashSet::new();
                      set.insert(HgPathBuf::from_bytes(b"subdir"));
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir")),
                          VisitChildrenSet::Set(set)
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir")),
                          VisitChildrenSet::Recursive
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/foo")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"folder")),
                          VisitChildrenSet::Empty
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
                          VisitChildrenSet::This
                      );
                      assert_eq!(
                          matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
                          VisitChildrenSet::This
                      );
                  }
              }

rust/hg-core/src/repo.rs

0 +1 -1

              use crate::changelog::Changelog;
              use crate::config::{Config, ConfigError, ConfigParseError};
              use crate::dirstate::DirstateParents;
              use crate::dirstate_tree::dirstate_map::DirstateMapWriteMode;
              use crate::dirstate_tree::on_disk::Docket as DirstateDocket;
              use crate::dirstate_tree::owning::OwningDirstateMap;
              use crate::errors::HgResultExt;
              use crate::errors::{HgError, IoResultExt};
              use crate::lock::{try_with_lock_no_wait, LockError};
              use crate::manifest::{Manifest, Manifestlog};
              use crate::revlog::filelog::Filelog;
              use crate::revlog::RevlogError;
              use crate::utils::debug::debug_wait_for_file_or_print;
              use crate::utils::files::get_path_from_bytes;
              use crate::utils::hg_path::HgPath;
              use crate::utils::SliceExt;
              use crate::vfs::{is_dir, is_file, Vfs};
              use crate::DirstateError;
              use crate::{requirements, NodePrefix, UncheckedRevision};
              use std::cell::{Ref, RefCell, RefMut};
              use std::collections::HashSet;
              use std::io::Seek;
              use std::io::SeekFrom;
              use std::io::Write as IoWrite;
              use std::path::{Path, PathBuf};
              const V2_MAX_READ_ATTEMPTS: usize = 5;
              type DirstateMapIdentity = (Option<u64>, Option<Vec<u8>>, usize);
              /// A repository on disk
              pub struct Repo {
                  working_directory: PathBuf,
                  dot_hg: PathBuf,
                  store: PathBuf,
                  requirements: HashSet<String>,
                  config: Config,
                  dirstate_parents: LazyCell<DirstateParents>,
                  dirstate_map: LazyCell<OwningDirstateMap>,
                  changelog: LazyCell<Changelog>,
                  manifestlog: LazyCell<Manifestlog>,
              }
              #[derive(Debug, derive_more::From)]
              pub enum RepoError {
                  NotFound {
                      at: PathBuf,
                  },
                  #[from]
                  ConfigParseError(ConfigParseError),
                  #[from]
                  Other(HgError),
              }
              impl From<ConfigError> for RepoError {
                  fn from(error: ConfigError) -> Self {
                      match error {
                          ConfigError::Parse(error) => error.into(),
                          ConfigError::Other(error) => error.into(),
                      }
                  }
              }
              impl Repo {
                  /// tries to find nearest repository root in current working directory or
                  /// its ancestors
                  pub fn find_repo_root() -> Result<PathBuf, RepoError> {
                      let current_directory = crate::utils::current_dir()?;
                      // ancestors() is inclusive: it first yields `current_directory`
                      // as-is.
                      for ancestor in current_directory.ancestors() {
                          if is_dir(ancestor.join(".hg"))? {
                              return Ok(ancestor.to_path_buf());
                          }
                      }
                      Err(RepoError::NotFound {
                          at: current_directory,
                      })
                  }
                  /// Find a repository, either at the given path (which must contain a `.hg`
                  /// sub-directory) or by searching the current directory and its
                  /// ancestors.
                  ///
                  /// A method with two very different "modes" like this usually a code smell
                  /// to make two methods instead, but in this case an `Option` is what rhg
                  /// sub-commands get from Clap for the `-R` / `--repository` CLI argument.
                  /// Having two methods would just move that `if` to almost all callers.
                  pub fn find(
                      config: &Config,
                      explicit_path: Option<PathBuf>,
                  ) -> Result<Self, RepoError> {
                      if let Some(root) = explicit_path {
                          if is_dir(root.join(".hg"))? {
                              Self::new_at_path(root, config)
                          } else if is_file(&root)? {
                              Err(HgError::unsupported("bundle repository").into())
                          } else {
                              Err(RepoError::NotFound { at: root })
                          }
                      } else {
                          let root = Self::find_repo_root()?;
                          Self::new_at_path(root, config)
                      }
                  }
                  /// To be called after checking that `.hg` is a sub-directory
                  fn new_at_path(
                      working_directory: PathBuf,
                      config: &Config,
                  ) -> Result<Self, RepoError> {
                      let dot_hg = working_directory.join(".hg");
                      let mut repo_config_files =
                          vec![dot_hg.join("hgrc"), dot_hg.join("hgrc-not-shared")];
                      let hg_vfs = Vfs { base: &dot_hg };
                      let mut reqs = requirements::load_if_exists(hg_vfs)?;
                      let relative =
                          reqs.contains(requirements::RELATIVE_SHARED_REQUIREMENT);
                      let shared =
                          reqs.contains(requirements::SHARED_REQUIREMENT) || relative;
                      // From `mercurial/localrepo.py`:
                      //
                      // if .hg/requires contains the sharesafe requirement, it means
                      // there exists a `.hg/store/requires` too and we should read it
                      // NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement
                      // is present. We never write SHARESAFE_REQUIREMENT for a repo if store
                      // is not present, refer checkrequirementscompat() for that
                      //
                      // However, if SHARESAFE_REQUIREMENT is not present, it means that the
                      // repository was shared the old way. We check the share source
                      // .hg/requires for SHARESAFE_REQUIREMENT to detect whether the
                      // current repository needs to be reshared
                      let share_safe = reqs.contains(requirements::SHARESAFE_REQUIREMENT);
                      let store_path;
                      if !shared {
                          store_path = dot_hg.join("store");
                      } else {
                          let bytes = hg_vfs.read("sharedpath")?;
                          let mut shared_path =
                              get_path_from_bytes(bytes.trim_end_matches(|b| b == b'\n'))
                                  .to_owned();
                          if relative {
                              shared_path = dot_hg.join(shared_path)
                          }
                          if !is_dir(&shared_path)? {
                              return Err(HgError::corrupted(format!(
                                  ".hg/sharedpath points to nonexistent directory {}",
                                  shared_path.display()
                              ))
                              .into());
                          }
                          store_path = shared_path.join("store");
                          let source_is_share_safe =
                              requirements::load(Vfs { base: &shared_path })?
                                  .contains(requirements::SHARESAFE_REQUIREMENT);
                          if share_safe != source_is_share_safe {
                              return Err(HgError::unsupported("share-safe mismatch").into());
                          }
                          if share_safe {
                              repo_config_files.insert(0, shared_path.join("hgrc"))
                          }
                      }
                      if share_safe {
                          reqs.extend(requirements::load(Vfs { base: &store_path })?);
                      }
                      let repo_config = if std::env::var_os("HGRCSKIPREPO").is_none() {
                          config.combine_with_repo(&repo_config_files)?
                      } else {
                          config.clone()
                      };
                      let repo = Self {
                          requirements: reqs,
                          working_directory,
                          store: store_path,
                          dot_hg,
                          config: repo_config,
                          dirstate_parents: LazyCell::new(),
                          dirstate_map: LazyCell::new(),
                          changelog: LazyCell::new(),
                          manifestlog: LazyCell::new(),
                      };
                      requirements::check(&repo)?;
                      Ok(repo)
                  }
                  pub fn working_directory_path(&self) -> &Path {
                      &self.working_directory
                  }
                  pub fn requirements(&self) -> &HashSet<String> {
                      &self.requirements
                  }
                  pub fn config(&self) -> &Config {
                      &self.config
                  }
                  /// For accessing repository files (in `.hg`), except for the store
                  /// (`.hg/store`).
                  pub fn hg_vfs(&self) -> Vfs<'_> {
                      Vfs { base: &self.dot_hg }
                  }
                  /// For accessing repository store files (in `.hg/store`)
                  pub fn store_vfs(&self) -> Vfs<'_> {
                      Vfs { base: &self.store }
                  }
                  /// For accessing the working copy
                  pub fn working_directory_vfs(&self) -> Vfs<'_> {
                      Vfs {
                          base: &self.working_directory,
                      }
                  }
                  pub fn try_with_wlock_no_wait<R>(
                      &self,
                      f: impl FnOnce() -> R,
                  ) -> Result<R, LockError> {
                      try_with_lock_no_wait(self.hg_vfs(), "wlock", f)
                  }
                  /// Whether this repo should use dirstate-v2.
                  /// The presence of `dirstate-v2` in the requirements does not mean that
                  /// the on-disk dirstate is necessarily in version 2. In most cases,
                  /// a dirstate-v2 file will indeed be found, but in rare cases (like the
                  /// upgrade mechanism being cut short), the on-disk version will be a
                  /// v1 file.
                  /// Semantically, having a requirement only means that a client cannot
                  /// properly understand or properly update the repo if it lacks the support
                  /// for the required feature, but not that that feature is actually used
                  /// in all occasions.
                  pub fn use_dirstate_v2(&self) -> bool {
                      self.requirements
                          .contains(requirements::DIRSTATE_V2_REQUIREMENT)
                  }
                  pub fn has_sparse(&self) -> bool {
                      self.requirements.contains(requirements::SPARSE_REQUIREMENT)
                  }
                  pub fn has_narrow(&self) -> bool {
                      self.requirements.contains(requirements::NARROW_REQUIREMENT)
                  }
                  pub fn has_nodemap(&self) -> bool {
                      self.requirements
                          .contains(requirements::NODEMAP_REQUIREMENT)
                  }
                  fn dirstate_file_contents(&self) -> Result<Vec<u8>, HgError> {
                      Ok(self
                          .hg_vfs()
                          .read("dirstate")
                          .io_not_found_as_none()?
                          .unwrap_or_default())
                  }
                  fn dirstate_identity(&self) -> Result<Option<u64>, HgError> {
                      use std::os::unix::fs::MetadataExt;
                      Ok(self
                          .hg_vfs()
                          .symlink_metadata("dirstate")
                          .io_not_found_as_none()?
                          .map(|meta| meta.ino()))
                  }
                  pub fn dirstate_parents(&self) -> Result<DirstateParents, HgError> {
                      Ok(*self
                          .dirstate_parents
                          .get_or_init(|| self.read_dirstate_parents())?)
                  }
                  fn read_dirstate_parents(&self) -> Result<DirstateParents, HgError> {
                      let dirstate = self.dirstate_file_contents()?;
                      let parents = if dirstate.is_empty() {
                          DirstateParents::NULL
                      } else if self.use_dirstate_v2() {
                          let docket_res =
                              crate::dirstate_tree::on_disk::read_docket(&dirstate);
                          match docket_res {
                              Ok(docket) => docket.parents(),
                              Err(_) => {
                                  log::info!(
                                      "Parsing dirstate docket failed, \
                                      falling back to dirstate-v1"
                                  );
                                  *crate::dirstate::parsers::parse_dirstate_parents(
                                      &dirstate,
                                  )?
                              }
                          }
                      } else {
                          *crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?
                      };
                      self.dirstate_parents.set(parents);
                      Ok(parents)
                  }
                  /// Returns the information read from the dirstate docket necessary to
                  /// check if the data file has been updated/deleted by another process
                  /// since we last read the dirstate.
                  /// Namely, the inode, data file uuid and the data size.
                  fn get_dirstate_data_file_integrity(
                      &self,
                  ) -> Result<DirstateMapIdentity, HgError> {
                      assert!(
                          self.use_dirstate_v2(),
                          "accessing dirstate data file ID without dirstate-v2"
                      );
                      // Get the identity before the contents since we could have a race
                      // between the two. Having an identity that is too old is fine, but
                      // one that is younger than the content change is bad.
                      let identity = self.dirstate_identity()?;
                      let dirstate = self.dirstate_file_contents()?;
                      if dirstate.is_empty() {
                          self.dirstate_parents.set(DirstateParents::NULL);
                          Ok((identity, None, 0))
                      } else {
                          let docket_res =
                              crate::dirstate_tree::on_disk::read_docket(&dirstate);
                          match docket_res {
                              Ok(docket) => {
                                  self.dirstate_parents.set(docket.parents());
                                  Ok((
                                      identity,
                                      Some(docket.uuid.to_owned()),
                                      docket.data_size(),
                                  ))
                              }
                              Err(_) => {
                                  log::info!(
                                      "Parsing dirstate docket failed, \
                                      falling back to dirstate-v1"
                                  );
                                  let parents =
                                      *crate::dirstate::parsers::parse_dirstate_parents(
                                          &dirstate,
                                      )?;
                                  self.dirstate_parents.set(parents);
                                  Ok((identity, None, 0))
                              }
                          }
                      }
                  }
                  fn new_dirstate_map(&self) -> Result<OwningDirstateMap, DirstateError> {
                      if self.use_dirstate_v2() {
                          // The v2 dirstate is split into a docket and a data file.
                          // Since we don't always take the `wlock` to read it
                          // (like in `hg status`), it is susceptible to races.
                          // A simple retry method should be enough since full rewrites
                          // only happen when too much garbage data is present and
                          // this race is unlikely.
                          let mut tries = 0;
                          while tries < V2_MAX_READ_ATTEMPTS {
                              tries += 1;
                              match self.read_docket_and_data_file() {
                                  Ok(m) => {
                                      return Ok(m);
                                  }
                                  Err(e) => match e {
                                      DirstateError::Common(HgError::RaceDetected(
                                          context,
                                      )) => {
                                          log::info!(
                                              "dirstate read race detected {} (retry {}/{})",
                                              context,
                                              tries,
                                              V2_MAX_READ_ATTEMPTS,
                                          );
                                          continue;
                                      }
                                      _ => {
                                          log::info!(
                                              "Reading dirstate v2 failed, \
                                              falling back to v1"
                                          );
                                          return self.new_dirstate_map_v1();
                                      }
                                  },
                              }
                          }
                          let error = HgError::abort(
                              format!("dirstate read race happened {tries} times in a row"),
 ,
                              None,
                          );
                          Err(DirstateError::Common(error))
                      } else {
                          self.new_dirstate_map_v1()
                      }
                  }
                  fn new_dirstate_map_v1(&self) -> Result<OwningDirstateMap, DirstateError> {
                      debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file");
                      let identity = self.dirstate_identity()?;
                      let dirstate_file_contents = self.dirstate_file_contents()?;
                      if dirstate_file_contents.is_empty() {
                          self.dirstate_parents.set(DirstateParents::NULL);
                          Ok(OwningDirstateMap::new_empty(Vec::new()))
                      } else {
                          let (map, parents) =
                              OwningDirstateMap::new_v1(dirstate_file_contents, identity)?;
                          self.dirstate_parents.set(parents);
                          Ok(map)
                      }
                  }
                  fn read_docket_and_data_file(
                      &self,
                  ) -> Result<OwningDirstateMap, DirstateError> {
                      debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file");
                      let dirstate_file_contents = self.dirstate_file_contents()?;
                      let identity = self.dirstate_identity()?;
                      if dirstate_file_contents.is_empty() {
                          self.dirstate_parents.set(DirstateParents::NULL);
                          return Ok(OwningDirstateMap::new_empty(Vec::new()));
                      }
                      let docket = crate::dirstate_tree::on_disk::read_docket(
                          &dirstate_file_contents,
                      )?;
                      debug_wait_for_file_or_print(
                          self.config(),
                          "dirstate.post-docket-read-file",
                      );
                      self.dirstate_parents.set(docket.parents());
                      let uuid = docket.uuid.to_owned();
                      let data_size = docket.data_size();
                      let context = "between reading dirstate docket and data file";
                      let race_error = HgError::RaceDetected(context.into());
                      let metadata = docket.tree_metadata();
                      let mut map = if crate::vfs::is_on_nfs_mount(docket.data_filename()) {
                          // Don't mmap on NFS to prevent `SIGBUS` error on deletion
                          let contents = self.hg_vfs().read(docket.data_filename());
                          let contents = match contents {
                              Ok(c) => c,
                              Err(HgError::IoError { error, context }) => {
                                  match error.raw_os_error().expect("real os error") {
                                      // 2 = ENOENT, No such file or directory
                                      // 116 = ESTALE, Stale NFS file handle
                                      //
                                      // TODO match on `error.kind()` when
                                      // `ErrorKind::StaleNetworkFileHandle` is stable.
 | 116 => {
                                          // Race where the data file was deleted right after
                                          // we read the docket, try again
                                          return Err(race_error.into());
                                      }
                                      _ => {
                                          return Err(
                                              HgError::IoError { error, context }.into()
                                          )
                                      }
                                  }
                              }
                              Err(e) => return Err(e.into()),
                          };
                          OwningDirstateMap::new_v2(
                              contents, data_size, metadata, uuid, identity,
                          )
                      } else {
                          match self
                              .hg_vfs()
                              .mmap_open(docket.data_filename())
                              .io_not_found_as_none()
                          {
                              Ok(Some(data_mmap)) => OwningDirstateMap::new_v2(
                                  data_mmap, data_size, metadata, uuid, identity,
                              ),
                              Ok(None) => {
                                  // Race where the data file was deleted right after we
                                  // read the docket, try again
                                  return Err(race_error.into());
                              }
                              Err(e) => return Err(e.into()),
                          }
                      }?;
                      let write_mode_config = self
                          .config()
                          .get_str(b"devel", b"dirstate.v2.data_update_mode")
                          .unwrap_or(Some("auto"))
                          .unwrap_or("auto"); // don't bother for devel options
                      let write_mode = match write_mode_config {
                          "auto" => DirstateMapWriteMode::Auto,
                          "force-new" => DirstateMapWriteMode::ForceNewDataFile,
                          "force-append" => DirstateMapWriteMode::ForceAppend,
                          _ => DirstateMapWriteMode::Auto,
                      };
                      map.with_dmap_mut(|m| m.set_write_mode(write_mode));
                      Ok(map)
                  }
                  pub fn dirstate_map(
                      &self,
                  ) -> Result<Ref<OwningDirstateMap>, DirstateError> {
                      self.dirstate_map.get_or_init(|| self.new_dirstate_map())
                  }
                  pub fn dirstate_map_mut(
                      &self,
                  ) -> Result<RefMut<OwningDirstateMap>, DirstateError> {
                      self.dirstate_map
                          .get_mut_or_init(|| self.new_dirstate_map())
                  }
                  fn new_changelog(&self) -> Result<Changelog, HgError> {
                      Changelog::open(&self.store_vfs(), self.has_nodemap())
                  }
                  pub fn changelog(&self) -> Result<Ref<Changelog>, HgError> {
                      self.changelog.get_or_init(|| self.new_changelog())
                  }
                  pub fn changelog_mut(&self) -> Result<RefMut<Changelog>, HgError> {
                      self.changelog.get_mut_or_init(|| self.new_changelog())
                  }
                  fn new_manifestlog(&self) -> Result<Manifestlog, HgError> {
                      Manifestlog::open(&self.store_vfs(), self.has_nodemap())
                  }
                  pub fn manifestlog(&self) -> Result<Ref<Manifestlog>, HgError> {
                      self.manifestlog.get_or_init(|| self.new_manifestlog())
                  }
                  pub fn manifestlog_mut(&self) -> Result<RefMut<Manifestlog>, HgError> {
                      self.manifestlog.get_mut_or_init(|| self.new_manifestlog())
                  }
                  /// Returns the manifest of the *changeset* with the given node ID
                  pub fn manifest_for_node(
                      &self,
                      node: impl Into<NodePrefix>,
                  ) -> Result<Manifest, RevlogError> {
                      self.manifestlog()?.data_for_node(
                          self.changelog()?
                              .data_for_node(node.into())?
                              .manifest_node()?
                              .into(),
                      )
                  }
                  /// Returns the manifest of the *changeset* with the given revision number
                  pub fn manifest_for_rev(
                      &self,
                      revision: UncheckedRevision,
                  ) -> Result<Manifest, RevlogError> {
                      self.manifestlog()?.data_for_node(
                          self.changelog()?
                              .data_for_rev(revision)?
                              .manifest_node()?
                              .into(),
                      )
                  }
                  pub fn has_subrepos(&self) -> Result<bool, DirstateError> {
                      if let Some(entry) = self.dirstate_map()?.get(HgPath::new(".hgsub"))? {
                          Ok(entry.tracked())
                      } else {
                          Ok(false)
                      }
                  }
                  pub fn filelog(&self, path: &HgPath) -> Result<Filelog, HgError> {
                      Filelog::open(self, path)
                  }
                  /// Write to disk any updates that were made through `dirstate_map_mut`.
                  ///
                  /// The "wlock" must be held while calling this.
                  /// See for example `try_with_wlock_no_wait`.
                  ///
                  /// TODO: have a `WritableRepo` type only accessible while holding the
                  /// lock?
                  pub fn write_dirstate(&self) -> Result<(), DirstateError> {
                      let map = self.dirstate_map()?;
                      // TODO: Maintain a `DirstateMap::dirty` flag, and return early here if
                      // it’s unset
                      let parents = self.dirstate_parents()?;
                      let (packed_dirstate, old_uuid_to_remove) = if self.use_dirstate_v2() {
                          let (identity, uuid, data_size) =
                              self.get_dirstate_data_file_integrity()?;
                          let identity_changed = identity != map.old_identity();
                          let uuid_changed = uuid.as_deref() != map.old_uuid();
                          let data_length_changed = data_size != map.old_data_size();
                          if identity_changed || uuid_changed || data_length_changed {
                              // If any of identity, uuid or length have changed since
                              // last disk read, don't write.
                              // This is fine because either we're in a command that doesn't
                              // write anything too important (like `hg status`), or we're in
                              // `hg add` and we're supposed to have taken the lock before
                              // reading anyway.
                              //
                              // TODO complain loudly if we've changed anything important
                              // without taking the lock.
                              // (see `hg help config.format.use-dirstate-tracked-hint`)
                              log::debug!(
                                  "dirstate has changed since last read, not updating."
                              );
                              return Ok(());
                          }
                          let uuid_opt = map.old_uuid();
                          let write_mode = if uuid_opt.is_some() {
                              DirstateMapWriteMode::Auto
                          } else {
                              DirstateMapWriteMode::ForceNewDataFile
                          };
                          let (data, tree_metadata, append, old_data_size) =
                              map.pack_v2(write_mode)?;
                          // Reuse the uuid, or generate a new one, keeping the old for
                          // deletion.
                          let (uuid, old_uuid) = match uuid_opt {
                              Some(uuid) => {
                                  let as_str = std::str::from_utf8(uuid)
                                      .map_err(|_| {
                                          HgError::corrupted(
                                              "non-UTF-8 dirstate data file ID",
                                          )
                                      })?
                                      .to_owned();
                                  if append {
                                      (as_str, None)
                                  } else {
                                      (DirstateDocket::new_uid(), Some(as_str))
                                  }
                              }
                              None => (DirstateDocket::new_uid(), None),
                          };
                          let data_filename = format!("dirstate.{}", uuid);
                          let data_filename = self.hg_vfs().join(data_filename);
                          let mut options = std::fs::OpenOptions::new();
                          options.write(true);
                          // Why are we not using the O_APPEND flag when appending?
                          //
                          // - O_APPEND makes it trickier to deal with garbage at the end of
                          //   the file, left by a previous uncommitted transaction. By
                          //   starting the write at [old_data_size] we make sure we erase
                          //   all such garbage.
                          //
                          // - O_APPEND requires to special-case 0-byte writes, whereas we
                          //   don't need that.
                          //
                          // - Some OSes have bugs in implementation O_APPEND:
                          //   revlog.py talks about a Solaris bug, but we also saw some ZFS
                          //   bug: https://github.com/openzfs/zfs/pull/3124,
                          //   https://github.com/openzfs/zfs/issues/13370
                          //
                          if !append {
                              log::trace!("creating a new dirstate data file");
                              options.create_new(true);
                          } else {
                              log::trace!("appending to the dirstate data file");
                          }
                          let data_size = (|| {
                              // TODO: loop and try another random ID if !append and this
                              // returns `ErrorKind::AlreadyExists`? Collision chance of two
                              // random IDs is one in 2**32
                              let mut file = options.open(&data_filename)?;
                              if append {
                                  file.seek(SeekFrom::Start(old_data_size as u64))?;
                              }
                              file.write_all(&data)?;
                              file.flush()?;
-                             file.seek(SeekFrom::Current(0))
+                             file.stream_position()
                          })()
                          .when_writing_file(&data_filename)?;
                          let packed_dirstate = DirstateDocket::serialize(
                              parents,
                              tree_metadata,
                              data_size,
                              uuid.as_bytes(),
                          )
                          .map_err(|_: std::num::TryFromIntError| {
                              HgError::corrupted("overflow in dirstate docket serialization")
                          })?;
                          (packed_dirstate, old_uuid)
                      } else {
                          let identity = self.dirstate_identity()?;
                          if identity != map.old_identity() {
                              // If identity changed since last disk read, don't write.
                              // This is fine because either we're in a command that doesn't
                              // write anything too important (like `hg status`), or we're in
                              // `hg add` and we're supposed to have taken the lock before
                              // reading anyway.
                              //
                              // TODO complain loudly if we've changed anything important
                              // without taking the lock.
                              // (see `hg help config.format.use-dirstate-tracked-hint`)
                              log::debug!(
                                  "dirstate has changed since last read, not updating."
                              );
                              return Ok(());
                          }
                          (map.pack_v1(parents)?, None)
                      };
                      let vfs = self.hg_vfs();
                      vfs.atomic_write("dirstate", &packed_dirstate)?;
                      if let Some(uuid) = old_uuid_to_remove {
                          // Remove the old data file after the new docket pointing to the
                          // new data file was written.
                          vfs.remove_file(format!("dirstate.{}", uuid))?;
                      }
                      Ok(())
                  }
              }
              /// Lazily-initialized component of `Repo` with interior mutability
              ///
              /// This differs from `OnceCell` in that the value can still be "deinitialized"
              /// later by setting its inner `Option` to `None`. It also takes the
              /// initialization function as an argument when the value is requested, not
              /// when the instance is created.
              struct LazyCell<T> {
                  value: RefCell<Option<T>>,
              }
              impl<T> LazyCell<T> {
                  fn new() -> Self {
                      Self {
                          value: RefCell::new(None),
                      }
                  }
                  fn set(&self, value: T) {
                      *self.value.borrow_mut() = Some(value)
                  }
                  fn get_or_init<E>(
                      &self,
                      init: impl Fn() -> Result<T, E>,
                  ) -> Result<Ref<T>, E> {
                      let mut borrowed = self.value.borrow();
                      if borrowed.is_none() {
                          drop(borrowed);
                          // Only use `borrow_mut` if it is really needed to avoid panic in
                          // case there is another outstanding borrow but mutation is not
                          // needed.
                          *self.value.borrow_mut() = Some(init()?);
                          borrowed = self.value.borrow()
                      }
                      Ok(Ref::map(borrowed, |option| option.as_ref().unwrap()))
                  }
                  fn get_mut_or_init<E>(
                      &self,
                      init: impl Fn() -> Result<T, E>,
                  ) -> Result<RefMut<T>, E> {
                      let mut borrowed = self.value.borrow_mut();
                      if borrowed.is_none() {
                          *borrowed = Some(init()?);
                      }
                      Ok(RefMut::map(borrowed, |option| option.as_mut().unwrap()))
                  }
              }

rust/hg-core/src/revlog/mod.rs

0 +2 -2

              // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
              //           and Mercurial contributors
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Mercurial concepts for handling revision history
              pub mod node;
              pub mod nodemap;
              mod nodemap_docket;
              pub mod path_encode;
              pub use node::{FromHexError, Node, NodePrefix};
              pub mod changelog;
              pub mod filelog;
              pub mod index;
              pub mod manifest;
              pub mod patch;
              use std::borrow::Cow;
              use std::io::Read;
              use std::ops::Deref;
              use std::path::Path;
              use flate2::read::ZlibDecoder;
              use sha1::{Digest, Sha1};
              use std::cell::RefCell;
              use zstd;
              use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
              use self::nodemap_docket::NodeMapDocket;
              use super::index::Index;
              use super::nodemap::{NodeMap, NodeMapError};
              use crate::errors::HgError;
              use crate::vfs::Vfs;
              /// As noted in revlog.c, revision numbers are actually encoded in
              /// 4 bytes, and are liberally converted to ints, whence the i32
              pub type BaseRevision = i32;
              /// Mercurial revision numbers
              /// In contrast to the more general [`UncheckedRevision`], these are "checked"
              /// in the sense that they should only be used for revisions that are
              /// valid for a given index (i.e. in bounds).
              #[derive(
                  Debug,
                  derive_more::Display,
                  Clone,
                  Copy,
                  Hash,
                  PartialEq,
                  Eq,
                  PartialOrd,
                  Ord,
              )]
              pub struct Revision(pub BaseRevision);
              impl format_bytes::DisplayBytes for Revision {
                  fn display_bytes(
                      &self,
                      output: &mut dyn std::io::Write,
                  ) -> std::io::Result<()> {
                      self.0.display_bytes(output)
                  }
              }
              /// Unchecked Mercurial revision numbers.
              ///
              /// Values of this type have no guarantee of being a valid revision number
              /// in any context. Use method `check_revision` to get a valid revision within
              /// the appropriate index object.
              #[derive(
                  Debug,
                  derive_more::Display,
                  Clone,
                  Copy,
                  Hash,
                  PartialEq,
                  Eq,
                  PartialOrd,
                  Ord,
              )]
              pub struct UncheckedRevision(pub BaseRevision);
              impl format_bytes::DisplayBytes for UncheckedRevision {
                  fn display_bytes(
                      &self,
                      output: &mut dyn std::io::Write,
                  ) -> std::io::Result<()> {
                      self.0.display_bytes(output)
                  }
              }
              impl From<Revision> for UncheckedRevision {
                  fn from(value: Revision) -> Self {
                      Self(value.0)
                  }
              }
              impl From<BaseRevision> for UncheckedRevision {
                  fn from(value: BaseRevision) -> Self {
                      Self(value)
                  }
              }
              /// Marker expressing the absence of a parent
              ///
              /// Independently of the actual representation, `NULL_REVISION` is guaranteed
              /// to be smaller than all existing revisions.
              pub const NULL_REVISION: Revision = Revision(-1);
              /// Same as `mercurial.node.wdirrev`
              ///
              /// This is also equal to `i32::max_value()`, but it's better to spell
              /// it out explicitely, same as in `mercurial.node`
              #[allow(clippy::unreadable_literal)]
              pub const WORKING_DIRECTORY_REVISION: UncheckedRevision =
                  UncheckedRevision(0x7fffffff);
              pub const WORKING_DIRECTORY_HEX: &str =
                  "ffffffffffffffffffffffffffffffffffffffff";
              /// The simplest expression of what we need of Mercurial DAGs.
              pub trait Graph {
                  /// Return the two parents of the given `Revision`.
                  ///
                  /// Each of the parents can be independently `NULL_REVISION`
                  fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
              }
              #[derive(Clone, Debug, PartialEq)]
              pub enum GraphError {
                  ParentOutOfRange(Revision),
              }
              /// The Mercurial Revlog Index
              ///
              /// This is currently limited to the minimal interface that is needed for
              /// the [`nodemap`](nodemap/index.html) module
              pub trait RevlogIndex {
                  /// Total number of Revisions referenced in this index
                  fn len(&self) -> usize;
                  fn is_empty(&self) -> bool {
                      self.len() == 0
                  }
                  /// Return a reference to the Node or `None` for `NULL_REVISION`
                  fn node(&self, rev: Revision) -> Option<&Node>;
                  /// Return a [`Revision`] if `rev` is a valid revision number for this
                  /// index
                  fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {
                      let rev = rev.0;
                      if rev == NULL_REVISION.0 || (rev >= 0 && (rev as usize) < self.len())
                      {
                          Some(Revision(rev))
                      } else {
                          None
                      }
                  }
              }
              const REVISION_FLAG_CENSORED: u16 = 1 << 15;
              const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
              const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
              const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
              // Keep this in sync with REVIDX_KNOWN_FLAGS in
              // mercurial/revlogutils/flagutil.py
              const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
                  | REVISION_FLAG_ELLIPSIS
                  | REVISION_FLAG_EXTSTORED
                  | REVISION_FLAG_HASCOPIESINFO;
              const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
              #[derive(Debug, derive_more::From, derive_more::Display)]
              pub enum RevlogError {
                  InvalidRevision,
                  /// Working directory is not supported
                  WDirUnsupported,
                  /// Found more than one entry whose ID match the requested prefix
                  AmbiguousPrefix,
                  #[from]
                  Other(HgError),
              }
              impl From<NodeMapError> for RevlogError {
                  fn from(error: NodeMapError) -> Self {
                      match error {
                          NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
                          NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
                              format!("nodemap point to revision {} not in index", rev),
                          ),
                      }
                  }
              }
              fn corrupted<S: AsRef<str>>(context: S) -> HgError {
                  HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
              }
              impl RevlogError {
                  fn corrupted<S: AsRef<str>>(context: S) -> Self {
                      RevlogError::Other(corrupted(context))
                  }
              }
              /// Read only implementation of revlog.
              pub struct Revlog {
                  /// When index and data are not interleaved: bytes of the revlog index.
                  /// When index and data are interleaved: bytes of the revlog index and
                  /// data.
                  index: Index,
                  /// When index and data are not interleaved: bytes of the revlog data
                  data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
                  /// When present on disk: the persistent nodemap for this revlog
                  nodemap: Option<nodemap::NodeTree>,
              }
              impl Graph for Revlog {
                  fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
                      self.index.parents(rev)
                  }
              }
              impl Revlog {
                  /// Open a revlog index file.
                  ///
                  /// It will also open the associated data file if index and data are not
                  /// interleaved.
                  pub fn open(
                      store_vfs: &Vfs,
                      index_path: impl AsRef<Path>,
                      data_path: Option<&Path>,
                      use_nodemap: bool,
                  ) -> Result<Self, HgError> {
                      Self::open_gen(store_vfs, index_path, data_path, use_nodemap, None)
                  }
                  fn open_gen(
                      store_vfs: &Vfs,
                      index_path: impl AsRef<Path>,
                      data_path: Option<&Path>,
                      use_nodemap: bool,
                      nodemap_for_test: Option<nodemap::NodeTree>,
                  ) -> Result<Self, HgError> {
                      let index_path = index_path.as_ref();
                      let index = {
-                         match store_vfs.mmap_open_opt(&index_path)? {
-                             None => Index::new(Box::new(vec![])),
+                         match store_vfs.mmap_open_opt(index_path)? {
+                             None => Index::new(Box::<Vec<_>>::default()),
                              Some(index_mmap) => {
                                  let index = Index::new(Box::new(index_mmap))?;
                                  Ok(index)
                              }
                          }
                      }?;
                      let default_data_path = index_path.with_extension("d");
                      // type annotation required
                      // won't recognize Mmap as Deref<Target = [u8]>
                      let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
                          if index.is_inline() {
                              None
                          } else {
                              let data_path = data_path.unwrap_or(&default_data_path);
                              let data_mmap = store_vfs.mmap_open(data_path)?;
                              Some(Box::new(data_mmap))
                          };
                      let nodemap = if index.is_inline() || !use_nodemap {
                          None
                      } else {
                          NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
                              |(docket, data)| {
                                  nodemap::NodeTree::load_bytes(
                                      Box::new(data),
                                      docket.data_length,
                                  )
                              },
                          )
                      };
                      let nodemap = nodemap_for_test.or(nodemap);
                      Ok(Revlog {
                          index,
                          data_bytes,
                          nodemap,
                      })
                  }
                  /// Return number of entries of the `Revlog`.
                  pub fn len(&self) -> usize {
                      self.index.len()
                  }
                  /// Returns `true` if the `Revlog` has zero `entries`.
                  pub fn is_empty(&self) -> bool {
                      self.index.is_empty()
                  }
                  /// Returns the node ID for the given revision number, if it exists in this
                  /// revlog
                  pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
                      if rev == NULL_REVISION.into() {
                          return Some(&NULL_NODE);
                      }
                      let rev = self.index.check_revision(rev)?;
                      Some(self.index.get_entry(rev)?.hash())
                  }
                  /// Return the revision number for the given node ID, if it exists in this
                  /// revlog
                  pub fn rev_from_node(
                      &self,
                      node: NodePrefix,
                  ) -> Result<Revision, RevlogError> {
                      if let Some(nodemap) = &self.nodemap {
                          nodemap
                              .find_bin(&self.index, node)?
                              .ok_or(RevlogError::InvalidRevision)
                      } else {
                          self.rev_from_node_no_persistent_nodemap(node)
                      }
                  }
                  /// Same as `rev_from_node`, without using a persistent nodemap
                  ///
                  /// This is used as fallback when a persistent nodemap is not present.
                  /// This happens when the persistent-nodemap experimental feature is not
                  /// enabled, or for small revlogs.
                  fn rev_from_node_no_persistent_nodemap(
                      &self,
                      node: NodePrefix,
                  ) -> Result<Revision, RevlogError> {
                      // Linear scan of the revlog
                      // TODO: consider building a non-persistent nodemap in memory to
                      // optimize these cases.
                      let mut found_by_prefix = None;
                      for rev in (-1..self.len() as BaseRevision).rev() {
                          let rev = Revision(rev as BaseRevision);
                          let candidate_node = if rev == Revision(-1) {
                              NULL_NODE
                          } else {
                              let index_entry =
                                  self.index.get_entry(rev).ok_or_else(|| {
                                      HgError::corrupted(
                                          "revlog references a revision not in the index",
                                      )
                                  })?;
                              *index_entry.hash()
                          };
                          if node == candidate_node {
                              return Ok(rev);
                          }
                          if node.is_prefix_of(&candidate_node) {
                              if found_by_prefix.is_some() {
                                  return Err(RevlogError::AmbiguousPrefix);
                              }
                              found_by_prefix = Some(rev)
                          }
                      }
                      found_by_prefix.ok_or(RevlogError::InvalidRevision)
                  }
                  /// Returns whether the given revision exists in this revlog.
                  pub fn has_rev(&self, rev: UncheckedRevision) -> bool {
                      self.index.check_revision(rev).is_some()
                  }
                  /// Return the full data associated to a revision.
                  ///
                  /// All entries required to build the final data out of deltas will be
                  /// retrieved as needed, and the deltas will be applied to the inital
                  /// snapshot to rebuild the final data.
                  pub fn get_rev_data(
                      &self,
                      rev: UncheckedRevision,
                  ) -> Result<Cow<[u8]>, RevlogError> {
                      if rev == NULL_REVISION.into() {
                          return Ok(Cow::Borrowed(&[]));
                      };
                      self.get_entry(rev)?.data()
                  }
                  /// [`Self::get_rev_data`] for checked revisions.
                  pub fn get_rev_data_for_checked_rev(
                      &self,
                      rev: Revision,
                  ) -> Result<Cow<[u8]>, RevlogError> {
                      if rev == NULL_REVISION {
                          return Ok(Cow::Borrowed(&[]));
                      };
                      self.get_entry_for_checked_rev(rev)?.data()
                  }
                  /// Check the hash of some given data against the recorded hash.
                  pub fn check_hash(
                      &self,
                      p1: Revision,
                      p2: Revision,
                      expected: &[u8],
                      data: &[u8],
                  ) -> bool {
                      let e1 = self.index.get_entry(p1);
                      let h1 = match e1 {
                          Some(ref entry) => entry.hash(),
                          None => &NULL_NODE,
                      };
                      let e2 = self.index.get_entry(p2);
                      let h2 = match e2 {
                          Some(ref entry) => entry.hash(),
                          None => &NULL_NODE,
                      };
                      hash(data, h1.as_bytes(), h2.as_bytes()) == expected
                  }
                  /// Build the full data of a revision out its snapshot
                  /// and its deltas.
                  fn build_data_from_deltas(
                      snapshot: RevlogEntry,
                      deltas: &[RevlogEntry],
                  ) -> Result<Vec<u8>, HgError> {
                      let snapshot = snapshot.data_chunk()?;
                      let deltas = deltas
                          .iter()
                          .rev()
                          .map(RevlogEntry::data_chunk)
                          .collect::<Result<Vec<_>, _>>()?;
                      let patches: Vec<_> =
                          deltas.iter().map(|d| patch::PatchList::new(d)).collect();
                      let patch = patch::fold_patch_lists(&patches);
                      Ok(patch.apply(&snapshot))
                  }
                  /// Return the revlog data.
                  fn data(&self) -> &[u8] {
                      match &self.data_bytes {
                          Some(data_bytes) => data_bytes,
                          None => panic!(
                              "forgot to load the data or trying to access inline data"
                          ),
                      }
                  }
                  pub fn make_null_entry(&self) -> RevlogEntry {
                      RevlogEntry {
                          revlog: self,
                          rev: NULL_REVISION,
                          bytes: b"",
                          compressed_len: 0,
                          uncompressed_len: 0,
                          base_rev_or_base_of_delta_chain: None,
                          p1: NULL_REVISION,
                          p2: NULL_REVISION,
                          flags: NULL_REVLOG_ENTRY_FLAGS,
                          hash: NULL_NODE,
                      }
                  }
                  fn get_entry_for_checked_rev(
                      &self,
                      rev: Revision,
                  ) -> Result<RevlogEntry, RevlogError> {
                      if rev == NULL_REVISION {
                          return Ok(self.make_null_entry());
                      }
                      let index_entry = self
                          .index
                          .get_entry(rev)
                          .ok_or(RevlogError::InvalidRevision)?;
                      let start = index_entry.offset();
                      let end = start + index_entry.compressed_len() as usize;
                      let data = if self.index.is_inline() {
                          self.index.data(start, end)
                      } else {
                          &self.data()[start..end]
                      };
                      let base_rev = self
                          .index
                          .check_revision(index_entry.base_revision_or_base_of_delta_chain())
                          .ok_or_else(|| {
                              RevlogError::corrupted(format!(
                                  "base revision for rev {} is invalid",
                                  rev
                              ))
                          })?;
                      let p1 =
                          self.index.check_revision(index_entry.p1()).ok_or_else(|| {
                              RevlogError::corrupted(format!(
                                  "p1 for rev {} is invalid",
                                  rev
                              ))
                          })?;
                      let p2 =
                          self.index.check_revision(index_entry.p2()).ok_or_else(|| {
                              RevlogError::corrupted(format!(
                                  "p2 for rev {} is invalid",
                                  rev
                              ))
                          })?;
                      let entry = RevlogEntry {
                          revlog: self,
                          rev,
                          bytes: data,
                          compressed_len: index_entry.compressed_len(),
                          uncompressed_len: index_entry.uncompressed_len(),
                          base_rev_or_base_of_delta_chain: if base_rev == rev {
                              None
                          } else {
                              Some(base_rev)
                          },
                          p1,
                          p2,
                          flags: index_entry.flags(),
                          hash: *index_entry.hash(),
                      };
                      Ok(entry)
                  }
                  /// Get an entry of the revlog.
                  pub fn get_entry(
                      &self,
                      rev: UncheckedRevision,
                  ) -> Result<RevlogEntry, RevlogError> {
                      if rev == NULL_REVISION.into() {
                          return Ok(self.make_null_entry());
                      }
                      let rev = self.index.check_revision(rev).ok_or_else(|| {
                          RevlogError::corrupted(format!("rev {} is invalid", rev))
                      })?;
                      self.get_entry_for_checked_rev(rev)
                  }
              }
              /// The revlog entry's bytes and the necessary informations to extract
              /// the entry's data.
              #[derive(Clone)]
              pub struct RevlogEntry<'revlog> {
                  revlog: &'revlog Revlog,
                  rev: Revision,
                  bytes: &'revlog [u8],
                  compressed_len: u32,
                  uncompressed_len: i32,
                  base_rev_or_base_of_delta_chain: Option<Revision>,
                  p1: Revision,
                  p2: Revision,
                  flags: u16,
                  hash: Node,
              }
              thread_local! {
                // seems fine to [unwrap] here: this can only fail due to memory allocation
                // failing, and it's normal for that to cause panic.
                static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
                    RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
              }
              fn zstd_decompress_to_buffer(
                  bytes: &[u8],
                  buf: &mut Vec<u8>,
              ) -> Result<usize, std::io::Error> {
                  ZSTD_DECODER
                      .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
              }
              impl<'revlog> RevlogEntry<'revlog> {
                  pub fn revision(&self) -> Revision {
                      self.rev
                  }
                  pub fn node(&self) -> &Node {
                      &self.hash
                  }
                  pub fn uncompressed_len(&self) -> Option<u32> {
                      u32::try_from(self.uncompressed_len).ok()
                  }
                  pub fn has_p1(&self) -> bool {
                      self.p1 != NULL_REVISION
                  }
                  pub fn p1_entry(
                      &self,
                  ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
                      if self.p1 == NULL_REVISION {
                          Ok(None)
                      } else {
                          Ok(Some(self.revlog.get_entry_for_checked_rev(self.p1)?))
                      }
                  }
                  pub fn p2_entry(
                      &self,
                  ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
                      if self.p2 == NULL_REVISION {
                          Ok(None)
                      } else {
                          Ok(Some(self.revlog.get_entry_for_checked_rev(self.p2)?))
                      }
                  }
                  pub fn p1(&self) -> Option<Revision> {
                      if self.p1 == NULL_REVISION {
                          None
                      } else {
                          Some(self.p1)
                      }
                  }
                  pub fn p2(&self) -> Option<Revision> {
                      if self.p2 == NULL_REVISION {
                          None
                      } else {
                          Some(self.p2)
                      }
                  }
                  pub fn is_censored(&self) -> bool {
                      (self.flags & REVISION_FLAG_CENSORED) != 0
                  }
                  pub fn has_length_affecting_flag_processor(&self) -> bool {
                      // Relevant Python code: revlog.size()
                      // note: ELLIPSIS is known to not change the content
                      (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
                  }
                  /// The data for this entry, after resolving deltas if any.
                  pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
                      let mut entry = self.clone();
                      let mut delta_chain = vec![];
                      // The meaning of `base_rev_or_base_of_delta_chain` depends on
                      // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
                      // `mercurial/revlogutils/constants.py` and the code in
                      // [_chaininfo] and in [index_deltachain].
                      let uses_generaldelta = self.revlog.index.uses_generaldelta();
                      while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
                          entry = if uses_generaldelta {
                              delta_chain.push(entry);
                              self.revlog.get_entry_for_checked_rev(base_rev)?
                          } else {
                              let base_rev = UncheckedRevision(entry.rev.0 - 1);
                              delta_chain.push(entry);
                              self.revlog.get_entry(base_rev)?
                          };
                      }
                      let data = if delta_chain.is_empty() {
                          entry.data_chunk()?
                      } else {
                          Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
                      };
                      Ok(data)
                  }
                  fn check_data(
                      &self,
                      data: Cow<'revlog, [u8]>,
                  ) -> Result<Cow<'revlog, [u8]>, RevlogError> {
                      if self.revlog.check_hash(
                          self.p1,
                          self.p2,
                          self.hash.as_bytes(),
                          &data,
                      ) {
                          Ok(data)
                      } else {
                          if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
                              return Err(HgError::unsupported(
                                  "ellipsis revisions are not supported by rhg",
                              )
                              .into());
                          }
                          Err(corrupted(format!(
                              "hash check failed for revision {}",
                              self.rev
                          ))
                          .into())
                      }
                  }
                  pub fn data(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
                      let data = self.rawdata()?;
                      if self.rev == NULL_REVISION {
                          return Ok(data);
                      }
                      if self.is_censored() {
                          return Err(HgError::CensoredNodeError.into());
                      }
                      self.check_data(data)
                  }
                  /// Extract the data contained in the entry.
                  /// This may be a delta. (See `is_delta`.)
                  fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
                      if self.bytes.is_empty() {
                          return Ok(Cow::Borrowed(&[]));
                      }
                      match self.bytes[0] {
                          // Revision data is the entirety of the entry, including this
                          // header.
                          b'\0' => Ok(Cow::Borrowed(self.bytes)),
                          // Raw revision data follows.
                          b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
                          // zlib (RFC 1950) data.
                          b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
                          // zstd data.
                          b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
                          // A proper new format should have had a repo/store requirement.
                          format_type => Err(corrupted(format!(
                              "unknown compression header '{}'",
                              format_type
                          ))),
                      }
                  }
                  fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
                      let mut decoder = ZlibDecoder::new(self.bytes);
                      if self.is_delta() {
                          let mut buf = Vec::with_capacity(self.compressed_len as usize);
                          decoder
                              .read_to_end(&mut buf)
                              .map_err(|e| corrupted(e.to_string()))?;
                          Ok(buf)
                      } else {
                          let cap = self.uncompressed_len.max(0) as usize;
                          let mut buf = vec![0; cap];
                          decoder
                              .read_exact(&mut buf)
                              .map_err(|e| corrupted(e.to_string()))?;
                          Ok(buf)
                      }
                  }
                  fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
                      let cap = self.uncompressed_len.max(0) as usize;
                      if self.is_delta() {
                          // [cap] is usually an over-estimate of the space needed because
                          // it's the length of delta-decoded data, but we're interested
                          // in the size of the delta.
                          // This means we have to [shrink_to_fit] to avoid holding on
                          // to a large chunk of memory, but it also means we must have a
                          // fallback branch, for the case when the delta is longer than
                          // the original data (surprisingly, this does happen in practice)
                          let mut buf = Vec::with_capacity(cap);
                          match zstd_decompress_to_buffer(self.bytes, &mut buf) {
                              Ok(_) => buf.shrink_to_fit(),
                              Err(_) => {
                                  buf.clear();
                                  zstd::stream::copy_decode(self.bytes, &mut buf)
                                      .map_err(|e| corrupted(e.to_string()))?;
                              }
                          };
                          Ok(buf)
                      } else {
                          let mut buf = Vec::with_capacity(cap);
                          let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
                              .map_err(|e| corrupted(e.to_string()))?;
                          if len != self.uncompressed_len as usize {
                              Err(corrupted("uncompressed length does not match"))
                          } else {
                              Ok(buf)
                          }
                      }
                  }
                  /// Tell if the entry is a snapshot or a delta
                  /// (influences on decompression).
                  fn is_delta(&self) -> bool {
                      self.base_rev_or_base_of_delta_chain.is_some()
                  }
              }
              /// Calculate the hash of a revision given its data and its parents.
              fn hash(
                  data: &[u8],
                  p1_hash: &[u8],
                  p2_hash: &[u8],
              ) -> [u8; NODE_BYTES_LENGTH] {
                  let mut hasher = Sha1::new();
                  let (a, b) = (p1_hash, p2_hash);
                  if a > b {
                      hasher.update(b);
                      hasher.update(a);
                  } else {
                      hasher.update(a);
                      hasher.update(b);
                  }
                  hasher.update(data);
                  *hasher.finalize().as_ref()
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
                  use itertools::Itertools;
                  #[test]
                  fn test_empty() {
                      let temp = tempfile::tempdir().unwrap();
                      let vfs = Vfs { base: temp.path() };
                      std::fs::write(temp.path().join("foo.i"), b"").unwrap();
                      let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
                      assert!(revlog.is_empty());
                      assert_eq!(revlog.len(), 0);
                      assert!(revlog.get_entry(0.into()).is_err());
                      assert!(!revlog.has_rev(0.into()));
                      assert_eq!(
                          revlog.rev_from_node(NULL_NODE.into()).unwrap(),
                          NULL_REVISION
                      );
                      let null_entry = revlog.get_entry(NULL_REVISION.into()).ok().unwrap();
                      assert_eq!(null_entry.revision(), NULL_REVISION);
                      assert!(null_entry.data().unwrap().is_empty());
                  }
                  #[test]
                  fn test_inline() {
                      let temp = tempfile::tempdir().unwrap();
                      let vfs = Vfs { base: temp.path() };
                      let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
                          .unwrap();
                      let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
                          .unwrap();
                      let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
                          .unwrap();
                      let entry0_bytes = IndexEntryBuilder::new()
                          .is_first(true)
                          .with_version(1)
                          .with_inline(true)
                          .with_offset(INDEX_ENTRY_SIZE)
                          .with_node(node0)
                          .build();
                      let entry1_bytes = IndexEntryBuilder::new()
                          .with_offset(INDEX_ENTRY_SIZE)
                          .with_node(node1)
                          .build();
                      let entry2_bytes = IndexEntryBuilder::new()
                          .with_offset(INDEX_ENTRY_SIZE)
                          .with_p1(Revision(0))
                          .with_p2(Revision(1))
                          .with_node(node2)
                          .build();
                      let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
                          .into_iter()
                          .flatten()
                          .collect_vec();
                      std::fs::write(temp.path().join("foo.i"), contents).unwrap();
                      let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
                      let entry0 = revlog.get_entry(0.into()).ok().unwrap();
                      assert_eq!(entry0.revision(), Revision(0));
                      assert_eq!(*entry0.node(), node0);
                      assert!(!entry0.has_p1());
                      assert_eq!(entry0.p1(), None);
                      assert_eq!(entry0.p2(), None);
                      let p1_entry = entry0.p1_entry().unwrap();
                      assert!(p1_entry.is_none());
                      let p2_entry = entry0.p2_entry().unwrap();
                      assert!(p2_entry.is_none());
                      let entry1 = revlog.get_entry(1.into()).ok().unwrap();
                      assert_eq!(entry1.revision(), Revision(1));
                      assert_eq!(*entry1.node(), node1);
                      assert!(!entry1.has_p1());
                      assert_eq!(entry1.p1(), None);
                      assert_eq!(entry1.p2(), None);
                      let p1_entry = entry1.p1_entry().unwrap();
                      assert!(p1_entry.is_none());
                      let p2_entry = entry1.p2_entry().unwrap();
                      assert!(p2_entry.is_none());
                      let entry2 = revlog.get_entry(2.into()).ok().unwrap();
                      assert_eq!(entry2.revision(), Revision(2));
                      assert_eq!(*entry2.node(), node2);
                      assert!(entry2.has_p1());
                      assert_eq!(entry2.p1(), Some(Revision(0)));
                      assert_eq!(entry2.p2(), Some(Revision(1)));
                      let p1_entry = entry2.p1_entry().unwrap();
                      assert!(p1_entry.is_some());
                      assert_eq!(p1_entry.unwrap().revision(), Revision(0));
                      let p2_entry = entry2.p2_entry().unwrap();
                      assert!(p2_entry.is_some());
                      assert_eq!(p2_entry.unwrap().revision(), Revision(1));
                  }
                  #[test]
                  fn test_nodemap() {
                      let temp = tempfile::tempdir().unwrap();
                      let vfs = Vfs { base: temp.path() };
                      // building a revlog with a forced Node starting with zeros
                      // This is a corruption, but it does not preclude using the nodemap
                      // if we don't try and access the data
                      let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
                          .unwrap();
                      let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
                          .unwrap();
                      let entry0_bytes = IndexEntryBuilder::new()
                          .is_first(true)
                          .with_version(1)
                          .with_inline(true)
                          .with_offset(INDEX_ENTRY_SIZE)
                          .with_node(node0)
                          .build();
                      let entry1_bytes = IndexEntryBuilder::new()
                          .with_offset(INDEX_ENTRY_SIZE)
                          .with_node(node1)
                          .build();
                      let contents = vec![entry0_bytes, entry1_bytes]
                          .into_iter()
                          .flatten()
                          .collect_vec();
                      std::fs::write(temp.path().join("foo.i"), contents).unwrap();
                      let mut idx = nodemap::tests::TestNtIndex::new();
                      idx.insert_node(Revision(0), node0).unwrap();
                      idx.insert_node(Revision(1), node1).unwrap();
                      let revlog =
                          Revlog::open_gen(&vfs, "foo.i", None, true, Some(idx.nt)).unwrap();
                      // accessing the data shows the corruption
                      revlog.get_entry(0.into()).unwrap().data().unwrap_err();
                      assert_eq!(
                          revlog.rev_from_node(NULL_NODE.into()).unwrap(),
                          Revision(-1)
                      );
                      assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), Revision(0));
                      assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), Revision(1));
                      assert_eq!(
                          revlog
                              .rev_from_node(NodePrefix::from_hex("000").unwrap())
                              .unwrap(),
                          Revision(-1)
                      );
                      assert_eq!(
                          revlog
                              .rev_from_node(NodePrefix::from_hex("b00").unwrap())
                              .unwrap(),
                          Revision(1)
                      );
                      // RevlogError does not implement PartialEq
                      // (ultimately because io::Error does not)
                      match revlog
                          .rev_from_node(NodePrefix::from_hex("00").unwrap())
                          .expect_err("Expected to give AmbiguousPrefix error")
                      {
                          RevlogError::AmbiguousPrefix => (),
                          e => {
                              panic!("Got another error than AmbiguousPrefix: {:?}", e);
                          }
                      };
                  }
              }

rust/hg-core/src/revlog/node.rs

0 +1 -1

              // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Definitions and utilities for Revision nodes
              //!
              //! In Mercurial code base, it is customary to call "a node" the binary SHA
              //! of a revision.
              use crate::errors::HgError;
              use bytes_cast::BytesCast;
              use std::fmt;
              /// The length in bytes of a `Node`
              ///
              /// This constant is meant to ease refactors of this module, and
              /// are private so that calling code does not expect all nodes have
              /// the same size, should we support several formats concurrently in
              /// the future.
              pub const NODE_BYTES_LENGTH: usize = 20;
              /// Id of the null node.
              ///
              /// Used to indicate the absence of node.
              pub const NULL_NODE_ID: [u8; NODE_BYTES_LENGTH] = [0u8; NODE_BYTES_LENGTH];
              /// The length in bytes of a `Node`
              ///
              /// see also `NODES_BYTES_LENGTH` about it being private.
              const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
              /// Default for UI presentation
              const SHORT_PREFIX_DEFAULT_NYBBLES_LENGTH: u8 = 12;
              /// Private alias for readability and to ease future change
              type NodeData = [u8; NODE_BYTES_LENGTH];
              /// Binary revision SHA
              ///
              /// ## Future changes of hash size
              ///
              /// To accomodate future changes of hash size, Rust callers
              /// should use the conversion methods at the boundaries (FFI, actual
              /// computation of hashes and I/O) only, and only if required.
              ///
              /// All other callers outside of unit tests should just handle `Node` values
              /// and never make any assumption on the actual length, using [`nybbles_len`]
              /// if they need a loop boundary.
              ///
              /// All methods that create a `Node` either take a type that enforces
              /// the size or return an error at runtime.
              ///
              /// [`nybbles_len`]: #method.nybbles_len
              #[derive(Copy, Clone, PartialEq, BytesCast, derive_more::From)]
              #[repr(transparent)]
              pub struct Node {
                  data: NodeData,
              }
              impl fmt::Debug for Node {
                  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
                      let n = format!("{:x?}", self.data);
                      // We're using debug_tuple because it makes the output a little
                      // more compact without losing data.
                      f.debug_tuple("Node").field(&n).finish()
                  }
              }
              /// The node value for NULL_REVISION
              pub const NULL_NODE: Node = Node {
                  data: [0; NODE_BYTES_LENGTH],
              };
              /// Return an error if the slice has an unexpected length
              impl<'a> TryFrom<&'a [u8]> for &'a Node {
                  type Error = ();
                  #[inline]
                  fn try_from(bytes: &'a [u8]) -> Result<Self, Self::Error> {
                      match Node::from_bytes(bytes) {
                          Ok((node, rest)) if rest.is_empty() => Ok(node),
                          _ => Err(()),
                      }
                  }
              }
              /// Return an error if the slice has an unexpected length
              impl TryFrom<&'_ [u8]> for Node {
                  type Error = std::array::TryFromSliceError;
                  #[inline]
                  fn try_from(bytes: &'_ [u8]) -> Result<Self, Self::Error> {
                      let data = bytes.try_into()?;
                      Ok(Self { data })
                  }
              }
              impl From<&'_ NodeData> for Node {
                  #[inline]
                  fn from(data: &'_ NodeData) -> Self {
                      Self { data: *data }
                  }
              }
              impl fmt::LowerHex for Node {
                  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
                      for &byte in &self.data {
                          write!(f, "{:02x}", byte)?
                      }
                      Ok(())
                  }
              }
              #[derive(Debug)]
              pub struct FromHexError;
              /// Low level utility function, also for prefixes
              fn get_nybble(s: &[u8], i: usize) -> u8 {
                  if i % 2 == 0 {
                      s[i / 2] >> 4
                  } else {
                      s[i / 2] & 0x0f
                  }
              }
              impl Node {
                  /// Retrieve the `i`th half-byte of the binary data.
                  ///
                  /// This is also the `i`th hexadecimal digit in numeric form,
                  /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
                  pub fn get_nybble(&self, i: usize) -> u8 {
                      get_nybble(&self.data, i)
                  }
                  /// Length of the data, in nybbles
                  pub fn nybbles_len(&self) -> usize {
                      // public exposure as an instance method only, so that we can
                      // easily support several sizes of hashes if needed in the future.
                      NODE_NYBBLES_LENGTH
                  }
                  /// Convert from hexadecimal string representation
                  ///
                  /// Exact length is required.
                  ///
                  /// To be used in FFI and I/O only, in order to facilitate future
                  /// changes of hash format.
                  pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Node, FromHexError> {
                      let prefix = NodePrefix::from_hex(hex)?;
                      if prefix.nybbles_len() == NODE_NYBBLES_LENGTH {
                          Ok(Self { data: prefix.data })
                      } else {
                          Err(FromHexError)
                      }
                  }
                  /// `from_hex`, but for input from an internal file of the repository such
                  /// as a changelog or manifest entry.
                  ///
                  /// An error is treated as repository corruption.
                  pub fn from_hex_for_repo(hex: impl AsRef<[u8]>) -> Result<Node, HgError> {
                      Self::from_hex(hex.as_ref()).map_err(|FromHexError| {
                          HgError::CorruptedRepository(format!(
                              "Expected a full hexadecimal node ID, found {}",
                              String::from_utf8_lossy(hex.as_ref())
                          ))
                      })
                  }
                  /// Provide access to binary data
                  ///
                  /// This is needed by FFI layers, for instance to return expected
                  /// binary values to Python.
                  pub fn as_bytes(&self) -> &[u8] {
                      &self.data
                  }
                  pub fn short(&self) -> NodePrefix {
                      NodePrefix {
                          nybbles_len: SHORT_PREFIX_DEFAULT_NYBBLES_LENGTH,
                          data: self.data,
                      }
                  }
                  pub fn pad_to_256_bits(&self) -> [u8; 32] {
                      let mut bits = [0; 32];
                      bits[..NODE_BYTES_LENGTH].copy_from_slice(&self.data);
                      bits
                  }
              }
              /// The beginning of a binary revision SHA.
              ///
              /// Since it can potentially come from an hexadecimal representation with
              /// odd length, it needs to carry around whether the last 4 bits are relevant
              /// or not.
              #[derive(Debug, PartialEq, Copy, Clone)]
              pub struct NodePrefix {
                  /// In `1..=NODE_NYBBLES_LENGTH`
                  nybbles_len: u8,
                  /// The first `4 * length_in_nybbles` bits are used (considering bits
                  /// within a bytes in big-endian: most significant first), the rest
                  /// are zero.
                  data: NodeData,
              }
              impl NodePrefix {
                  /// Convert from hexadecimal string representation
                  ///
                  /// Similarly to `hex::decode`, can be used with Unicode string types
                  /// (`String`, `&str`) as well as bytes.
                  ///
                  /// To be used in FFI and I/O only, in order to facilitate future
                  /// changes of hash format.
                  pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, FromHexError> {
                      let hex = hex.as_ref();
                      let len = hex.len();
                      if len > NODE_NYBBLES_LENGTH || len == 0 {
                          return Err(FromHexError);
                      }
                      let mut data = [0; NODE_BYTES_LENGTH];
                      let mut nybbles_len = 0;
                      for &ascii_byte in hex {
                          let nybble = match char::from(ascii_byte).to_digit(16) {
                              Some(digit) => digit as u8,
                              None => return Err(FromHexError),
                          };
                          // Fill in the upper half of a byte first, then the lower half.
                          let shift = if nybbles_len % 2 == 0 { 4 } else { 0 };
                          data[nybbles_len as usize / 2] |= nybble << shift;
                          nybbles_len += 1;
                      }
                      Ok(Self { data, nybbles_len })
                  }
                  pub fn nybbles_len(&self) -> usize {
                      self.nybbles_len as _
                  }
                  pub fn is_prefix_of(&self, node: &Node) -> bool {
                      let full_bytes = self.nybbles_len() / 2;
                      if self.data[..full_bytes] != node.data[..full_bytes] {
                          return false;
                      }
                      if self.nybbles_len() % 2 == 0 {
                          return true;
                      }
                      let last = self.nybbles_len() - 1;
                      self.get_nybble(last) == node.get_nybble(last)
                  }
                  /// Retrieve the `i`th half-byte from the prefix.
                  ///
                  /// This is also the `i`th hexadecimal digit in numeric form,
                  /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
                  pub fn get_nybble(&self, i: usize) -> u8 {
                      assert!(i < self.nybbles_len());
                      get_nybble(&self.data, i)
                  }
                  fn iter_nybbles(&self) -> impl Iterator<Item = u8> + '_ {
                      (0..self.nybbles_len()).map(move |i| get_nybble(&self.data, i))
                  }
                  /// Return the index first nybble that's different from `node`
                  ///
                  /// If the return value is `None` that means that `self` is
                  /// a prefix of `node`, but the current method is a bit slower
                  /// than `is_prefix_of`.
                  ///
                  /// Returned index is as in `get_nybble`, i.e., starting at 0.
                  pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
                      self.iter_nybbles()
                          .zip(NodePrefix::from(*node).iter_nybbles())
                          .position(|(a, b)| a != b)
                  }
              }
              impl fmt::LowerHex for NodePrefix {
                  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
                      let full_bytes = self.nybbles_len() / 2;
                      for &byte in &self.data[..full_bytes] {
                          write!(f, "{:02x}", byte)?
                      }
                      if self.nybbles_len() % 2 == 1 {
                          let last = self.nybbles_len() - 1;
                          write!(f, "{:x}", self.get_nybble(last))?
                      }
                      Ok(())
                  }
              }
              /// A shortcut for full `Node` references
              impl From<&'_ Node> for NodePrefix {
                  fn from(node: &'_ Node) -> Self {
                      NodePrefix {
                          nybbles_len: node.nybbles_len() as _,
                          data: node.data,
                      }
                  }
              }
              /// A shortcut for full `Node` references
              impl From<Node> for NodePrefix {
                  fn from(node: Node) -> Self {
                      NodePrefix {
                          nybbles_len: node.nybbles_len() as _,
                          data: node.data,
                      }
                  }
              }
              impl PartialEq<Node> for NodePrefix {
                  fn eq(&self, other: &Node) -> bool {
                      self.data == other.data && self.nybbles_len() == other.nybbles_len()
                  }
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  const SAMPLE_NODE_HEX: &str = "0123456789abcdeffedcba9876543210deadbeef";
                  const SAMPLE_NODE: Node = Node {
                      data: [
 x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
 x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
                      ],
                  };
                  /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
                  /// The padding is made with zeros.
                  pub fn hex_pad_right(hex: &str) -> String {
                      let mut res = hex.to_string();
                      while res.len() < NODE_NYBBLES_LENGTH {
                          res.push('0');
                      }
                      res
                  }
                  #[test]
                  fn test_node_from_hex() {
                      let not_hex = "012... oops";
                      let too_short = "0123";
                      let too_long = format!("{}0", SAMPLE_NODE_HEX);
                      assert_eq!(Node::from_hex(SAMPLE_NODE_HEX).unwrap(), SAMPLE_NODE);
                      assert!(Node::from_hex(not_hex).is_err());
                      assert!(Node::from_hex(too_short).is_err());
-                     assert!(Node::from_hex(&too_long).is_err());
+                     assert!(Node::from_hex(too_long).is_err());
                  }
                  #[test]
                  fn test_node_encode_hex() {
                      assert_eq!(format!("{:x}", SAMPLE_NODE), SAMPLE_NODE_HEX);
                  }
                  #[test]
                  fn test_prefix_from_to_hex() -> Result<(), FromHexError> {
                      assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1")?), "0e1");
                      assert_eq!(format!("{:x}", NodePrefix::from_hex("0e1a")?), "0e1a");
                      assert_eq!(
                          format!("{:x}", NodePrefix::from_hex(SAMPLE_NODE_HEX)?),
                          SAMPLE_NODE_HEX
                      );
                      Ok(())
                  }
                  #[test]
                  fn test_prefix_from_hex_errors() {
                      assert!(NodePrefix::from_hex("testgr").is_err());
                      let mut long = format!("{:x}", NULL_NODE);
                      long.push('c');
                      assert!(NodePrefix::from_hex(&long).is_err())
                  }
                  #[test]
                  fn test_is_prefix_of() -> Result<(), FromHexError> {
                      let mut node_data = [0; NODE_BYTES_LENGTH];
                      node_data[0] = 0x12;
                      node_data[1] = 0xca;
                      let node = Node::from(node_data);
                      assert!(NodePrefix::from_hex("12")?.is_prefix_of(&node));
                      assert!(!NodePrefix::from_hex("1a")?.is_prefix_of(&node));
                      assert!(NodePrefix::from_hex("12c")?.is_prefix_of(&node));
                      assert!(!NodePrefix::from_hex("12d")?.is_prefix_of(&node));
                      Ok(())
                  }
                  #[test]
                  fn test_get_nybble() -> Result<(), FromHexError> {
                      let prefix = NodePrefix::from_hex("dead6789cafe")?;
                      assert_eq!(prefix.get_nybble(0), 13);
                      assert_eq!(prefix.get_nybble(7), 9);
                      Ok(())
                  }
                  #[test]
                  fn test_first_different_nybble_even_prefix() {
                      let prefix = NodePrefix::from_hex("12ca").unwrap();
                      let mut node = Node::from([0; NODE_BYTES_LENGTH]);
                      assert_eq!(prefix.first_different_nybble(&node), Some(0));
                      node.data[0] = 0x13;
                      assert_eq!(prefix.first_different_nybble(&node), Some(1));
                      node.data[0] = 0x12;
                      assert_eq!(prefix.first_different_nybble(&node), Some(2));
                      node.data[1] = 0xca;
                      // now it is a prefix
                      assert_eq!(prefix.first_different_nybble(&node), None);
                  }
                  #[test]
                  fn test_first_different_nybble_odd_prefix() {
                      let prefix = NodePrefix::from_hex("12c").unwrap();
                      let mut node = Node::from([0; NODE_BYTES_LENGTH]);
                      assert_eq!(prefix.first_different_nybble(&node), Some(0));
                      node.data[0] = 0x13;
                      assert_eq!(prefix.first_different_nybble(&node), Some(1));
                      node.data[0] = 0x12;
                      assert_eq!(prefix.first_different_nybble(&node), Some(2));
                      node.data[1] = 0xca;
                      // now it is a prefix
                      assert_eq!(prefix.first_different_nybble(&node), None);
                  }
              }
              #[cfg(test)]
              pub use tests::hex_pad_right;

rust/hg-core/src/revlog/nodemap.rs

0 +12 -6

              // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
              //           and Mercurial contributors
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Indexing facilities for fast retrieval of `Revision` from `Node`
              //!
              //! This provides a variation on the 16-ary radix tree that is
              //! provided as "nodetree" in revlog.c, ready for append-only persistence
              //! on disk.
              //!
              //! Following existing implicit conventions, the "nodemap" terminology
              //! is used in a more abstract context.
              use crate::UncheckedRevision;
              use super::{
                  node::NULL_NODE, Node, NodePrefix, Revision, RevlogIndex, NULL_REVISION,
              };
              use bytes_cast::{unaligned, BytesCast};
              use std::cmp::max;
              use std::fmt;
              use std::mem::{self, align_of, size_of};
              use std::ops::Deref;
              use std::ops::Index;
              #[derive(Debug, PartialEq)]
              pub enum NodeMapError {
                  /// A `NodePrefix` matches several [`Revision`]s.
                  ///
                  /// This can be returned by methods meant for (at most) one match.
                  MultipleResults,
                  /// A `Revision` stored in the nodemap could not be found in the index
                  RevisionNotInIndex(UncheckedRevision),
              }
              /// Mapping system from Mercurial nodes to revision numbers.
              ///
              /// ## `RevlogIndex` and `NodeMap`
              ///
              /// One way to think about their relationship is that
              /// the `NodeMap` is a prefix-oriented reverse index of the [`Node`]
              /// information carried by a [`RevlogIndex`].
              ///
              /// Many of the methods in this trait take a `RevlogIndex` argument
              /// which is used for validation of their results. This index must naturally
              /// be the one the `NodeMap` is about, and it must be consistent.
              ///
              /// Notably, the `NodeMap` must not store
              /// information about more `Revision` values than there are in the index.
              /// In these methods, an encountered `Revision` is not in the index, a
              /// [RevisionNotInIndex](NodeMapError) error is returned.
              ///
              /// In insert operations, the rule is thus that the `NodeMap` must always
              /// be updated after the `RevlogIndex` it is about.
              pub trait NodeMap {
                  /// Find the unique `Revision` having the given `Node`
                  ///
                  /// If no Revision matches the given `Node`, `Ok(None)` is returned.
                  fn find_node(
                      &self,
                      index: &impl RevlogIndex,
                      node: &Node,
                  ) -> Result<Option<Revision>, NodeMapError> {
                      self.find_bin(index, node.into())
                  }
                  /// Find the unique Revision whose `Node` starts with a given binary prefix
                  ///
                  /// If no Revision matches the given prefix, `Ok(None)` is returned.
                  ///
                  /// If several Revisions match the given prefix, a
                  /// [MultipleResults](NodeMapError)  error is returned.
                  fn find_bin(
                      &self,
                      idx: &impl RevlogIndex,
                      prefix: NodePrefix,
                  ) -> Result<Option<Revision>, NodeMapError>;
                  /// Give the size of the shortest node prefix that determines
                  /// the revision uniquely.
                  ///
                  /// From a binary node prefix, if it is matched in the node map, this
                  /// returns the number of hexadecimal digits that would had sufficed
                  /// to find the revision uniquely.
                  ///
                  /// Returns `None` if no [`Revision`] could be found for the prefix.
                  ///
                  /// If several Revisions match the given prefix, a
                  /// [MultipleResults](NodeMapError)  error is returned.
                  fn unique_prefix_len_bin(
                      &self,
                      idx: &impl RevlogIndex,
                      node_prefix: NodePrefix,
                  ) -> Result<Option<usize>, NodeMapError>;
                  /// Same as [unique_prefix_len_bin](Self::unique_prefix_len_bin), with
                  /// a full [`Node`] as input
                  fn unique_prefix_len_node(
                      &self,
                      idx: &impl RevlogIndex,
                      node: &Node,
                  ) -> Result<Option<usize>, NodeMapError> {
                      self.unique_prefix_len_bin(idx, node.into())
                  }
              }
              pub trait MutableNodeMap: NodeMap {
                  fn insert<I: RevlogIndex>(
                      &mut self,
                      index: &I,
                      node: &Node,
                      rev: Revision,
                  ) -> Result<(), NodeMapError>;
              }
              /// Low level NodeTree [`Block`] elements
              ///
              /// These are exactly as for instance on persistent storage.
              type RawElement = unaligned::I32Be;
              /// High level representation of values in NodeTree
              /// [`Blocks`](struct.Block.html)
              ///
              /// This is the high level representation that most algorithms should
              /// use.
              #[derive(Clone, Debug, Eq, PartialEq)]
              enum Element {
                  // This is not a Mercurial revision. It's a `i32` because this is the
                  // right type for this structure.
                  Rev(i32),
                  Block(usize),
                  None,
              }
              impl From<RawElement> for Element {
                  /// Conversion from low level representation, after endianness conversion.
                  ///
                  /// See [`Block`](struct.Block.html) for explanation about the encoding.
                  fn from(raw: RawElement) -> Element {
                      let int = raw.get();
                      if int >= 0 {
                          Element::Block(int as usize)
                      } else if int == -1 {
                          Element::None
                      } else {
                          Element::Rev(-int - 2)
                      }
                  }
              }
              impl From<Element> for RawElement {
                  fn from(element: Element) -> RawElement {
                      RawElement::from(match element {
                          Element::None => 0,
                          Element::Block(i) => i as i32,
                          Element::Rev(rev) => -rev - 2,
                      })
                  }
              }
              const ELEMENTS_PER_BLOCK: usize = 16; // number of different values in a nybble
              /// A logical block of the [`NodeTree`], packed with a fixed size.
              ///
              /// These are always used in container types implementing `Index<Block>`,
              /// such as `&Block`
              ///
              /// As an array of integers, its ith element encodes that the
              /// ith potential edge from the block, representing the ith hexadecimal digit
              /// (nybble) `i` is either:
              ///
              /// - absent (value -1)
              /// - another `Block` in the same indexable container (value ≥ 0)
              ///  - a [`Revision`] leaf (value ≤ -2)
              ///
              /// Endianness has to be fixed for consistency on shared storage across
              /// different architectures.
              ///
              /// A key difference with the C `nodetree` is that we need to be
              /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
              /// rather than 0 and the [`Revision`] range upper limit of -2 instead of -1.
              ///
              /// Another related difference is that `NULL_REVISION` (-1) is not
              /// represented at all, because we want an immutable empty nodetree
              /// to be valid.
              #[derive(Copy, Clone, BytesCast, PartialEq)]
              #[repr(transparent)]
              pub struct Block([RawElement; ELEMENTS_PER_BLOCK]);
              impl Block {
                  fn new() -> Self {
                      let absent_node = RawElement::from(-1);
                      Block([absent_node; ELEMENTS_PER_BLOCK])
                  }
                  fn get(&self, nybble: u8) -> Element {
                      self.0[nybble as usize].into()
                  }
                  fn set(&mut self, nybble: u8, element: Element) {
                      self.0[nybble as usize] = element.into()
                  }
              }
              impl fmt::Debug for Block {
                  /// sparse representation for testing and debugging purposes
                  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
                      f.debug_map()
                          .entries((0..16).filter_map(|i| match self.get(i) {
                              Element::None => None,
                              element => Some((i, element)),
                          }))
                          .finish()
                  }
              }
              /// A mutable 16-radix tree with the root block logically at the end
              ///
              /// Because of the append only nature of our node trees, we need to
              /// keep the original untouched and store new blocks separately.
              ///
              /// The mutable root [`Block`] is kept apart so that we don't have to rebump
              /// it on each insertion.
              pub struct NodeTree {
                  readonly: Box<dyn Deref<Target = [Block]> + Send>,
                  growable: Vec<Block>,
                  root: Block,
                  masked_inner_blocks: usize,
              }
              impl Index<usize> for NodeTree {
                  type Output = Block;
                  fn index(&self, i: usize) -> &Block {
                      let ro_len = self.readonly.len();
                      if i < ro_len {
                          &self.readonly[i]
                      } else if i == ro_len + self.growable.len() {
                          &self.root
                      } else {
                          &self.growable[i - ro_len]
                      }
                  }
              }
              /// Return `None` unless the [`Node`] for `rev` has given prefix in `idx`.
              fn has_prefix_or_none(
                  idx: &impl RevlogIndex,
                  prefix: NodePrefix,
                  rev: UncheckedRevision,
              ) -> Result<Option<Revision>, NodeMapError> {
                  match idx.check_revision(rev) {
                      Some(checked) => idx
                          .node(checked)
                          .ok_or(NodeMapError::RevisionNotInIndex(rev))
                          .map(|node| {
                              if prefix.is_prefix_of(node) {
                                  Some(checked)
                              } else {
                                  None
                              }
                          }),
                      None => Err(NodeMapError::RevisionNotInIndex(rev)),
                  }
              }
              /// validate that the candidate's node starts indeed with given prefix,
              /// and treat ambiguities related to [`NULL_REVISION`].
              ///
              /// From the data in the NodeTree, one can only conclude that some
              /// revision is the only one for a *subprefix* of the one being looked up.
              fn validate_candidate(
                  idx: &impl RevlogIndex,
                  prefix: NodePrefix,
                  candidate: (Option<UncheckedRevision>, usize),
              ) -> Result<(Option<Revision>, usize), NodeMapError> {
                  let (rev, steps) = candidate;
                  if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
                      rev.map_or(Ok((None, steps)), |r| {
                          has_prefix_or_none(idx, prefix, r)
                              .map(|opt| (opt, max(steps, nz_nybble + 1)))
                      })
                  } else {
                      // the prefix is only made of zeros; NULL_REVISION always matches it
                      // and any other *valid* result is an ambiguity
                      match rev {
                          None => Ok((Some(NULL_REVISION), steps + 1)),
                          Some(r) => match has_prefix_or_none(idx, prefix, r)? {
                              None => Ok((Some(NULL_REVISION), steps + 1)),
                              _ => Err(NodeMapError::MultipleResults),
                          },
                      }
                  }
              }
              impl NodeTree {
                  /// Initiate a NodeTree from an immutable slice-like of `Block`
                  ///
                  /// We keep `readonly` and clone its root block if it isn't empty.
                  fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self {
                      let root = readonly.last().cloned().unwrap_or_else(Block::new);
                      NodeTree {
                          readonly,
                          growable: Vec::new(),
                          root,
                          masked_inner_blocks: 0,
                      }
                  }
                  /// Create from an opaque bunch of bytes
                  ///
                  /// The created [`NodeTreeBytes`] from `bytes`,
                  /// of which exactly `amount` bytes are used.
                  ///
                  /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
                  /// - `amount` is expressed in bytes, and is not automatically derived from
                  ///   `bytes`, so that a caller that manages them atomically can perform
                  ///   temporary disk serializations and still rollback easily if needed.
                  ///   First use-case for this would be to support Mercurial shell hooks.
                  ///
                  /// panics if `buffer` is smaller than `amount`
                  pub fn load_bytes(
                      bytes: Box<dyn Deref<Target = [u8]> + Send>,
                      amount: usize,
                  ) -> Self {
                      NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount)))
                  }
                  /// Retrieve added [`Block`]s and the original immutable data
                  pub fn into_readonly_and_added(
                      self,
                  ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
                      let mut vec = self.growable;
                      let readonly = self.readonly;
                      if readonly.last() != Some(&self.root) {
                          vec.push(self.root);
                      }
                      (readonly, vec)
                  }
                  /// Retrieve added [`Block]s as bytes, ready to be written to persistent
                  /// storage
                  pub fn into_readonly_and_added_bytes(
                      self,
                  ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
                      let (readonly, vec) = self.into_readonly_and_added();
                      // Prevent running `v`'s destructor so we are in complete control
                      // of the allocation.
                      let vec = mem::ManuallyDrop::new(vec);
                      // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
                      // bytes, so this is perfectly safe.
                      let bytes = unsafe {
                          // Check for compatible allocation layout.
                          // (Optimized away by constant-folding + dead code elimination.)
                          assert_eq!(size_of::<Block>(), 64);
                          assert_eq!(align_of::<Block>(), 1);
                          // /!\ Any use of `vec` after this is use-after-free.
                          // TODO: use `into_raw_parts` once stabilized
                          Vec::from_raw_parts(
                              vec.as_ptr() as *mut u8,
                              vec.len() * size_of::<Block>(),
                              vec.capacity() * size_of::<Block>(),
                          )
                      };
                      (readonly, bytes)
                  }
                  /// Total number of blocks
                  fn len(&self) -> usize {
                      self.readonly.len() + self.growable.len() + 1
                  }
                  /// Implemented for completeness
                  ///
                  /// A `NodeTree` always has at least the mutable root block.
                  #[allow(dead_code)]
                  fn is_empty(&self) -> bool {
                      false
                  }
                  /// Main working method for `NodeTree` searches
                  ///
                  /// The first returned value is the result of analysing `NodeTree` data
                  /// *alone*: whereas `None` guarantees that the given prefix is absent
                  /// from the [`NodeTree`] data (but still could match [`NULL_NODE`]), with
                  /// `Some(rev)`, it is to be understood that `rev` is the unique
                  /// [`Revision`] that could match the prefix. Actually, all that can
                  /// be inferred from
                  /// the `NodeTree` data is that `rev` is the revision with the longest
                  /// common node prefix with the given prefix.
                  /// We return an [`UncheckedRevision`] because we have no guarantee that
                  /// the revision we found is valid for the index.
                  ///
                  /// The second returned value is the size of the smallest subprefix
                  /// of `prefix` that would give the same result, i.e. not the
                  /// [MultipleResults](NodeMapError) error variant (again, using only the
                  /// data of the [`NodeTree`]).
                  fn lookup(
                      &self,
                      prefix: NodePrefix,
                  ) -> Result<(Option<UncheckedRevision>, usize), NodeMapError> {
                      for (i, visit_item) in self.visit(prefix).enumerate() {
                          if let Some(opt) = visit_item.final_revision() {
                              return Ok((opt, i + 1));
                          }
                      }
                      Err(NodeMapError::MultipleResults)
                  }
                  fn visit(&self, prefix: NodePrefix) -> NodeTreeVisitor {
                      NodeTreeVisitor {
                          nt: self,
                          prefix,
                          visit: self.len() - 1,
                          nybble_idx: 0,
                          done: false,
                      }
                  }
                  /// Return a mutable reference for `Block` at index `idx`.
                  ///
                  /// If `idx` lies in the immutable area, then the reference is to
                  /// a newly appended copy.
                  ///
                  /// Returns (new_idx, glen, mut_ref) where
                  ///
                  /// - `new_idx` is the index of the mutable `Block`
                  /// - `mut_ref` is a mutable reference to the mutable Block.
                  /// - `glen` is the new length of `self.growable`
                  ///
                  /// Note: the caller wouldn't be allowed to query `self.growable.len()`
                  /// itself because of the mutable borrow taken with the returned `Block`
                  fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) {
                      let ro_blocks = &self.readonly;
                      let ro_len = ro_blocks.len();
                      let glen = self.growable.len();
                      if idx < ro_len {
                          self.masked_inner_blocks += 1;
                          self.growable.push(ro_blocks[idx]);
                          (glen + ro_len, &mut self.growable[glen], glen + 1)
                      } else if glen + ro_len == idx {
                          (idx, &mut self.root, glen)
                      } else {
                          (idx, &mut self.growable[idx - ro_len], glen)
                      }
                  }
                  /// Main insertion method
                  ///
                  /// This will dive in the node tree to find the deepest `Block` for
                  /// `node`, split it as much as needed and record `node` in there.
                  /// The method then backtracks, updating references in all the visited
                  /// blocks from the root.
                  ///
                  /// All the mutated `Block` are copied first to the growable part if
                  /// needed. That happens for those in the immutable part except the root.
                  pub fn insert<I: RevlogIndex>(
                      &mut self,
                      index: &I,
                      node: &Node,
                      rev: Revision,
                  ) -> Result<(), NodeMapError> {
                      let ro_len = &self.readonly.len();
                      let mut visit_steps: Vec<_> = self.visit(node.into()).collect();
                      let read_nybbles = visit_steps.len();
                      // visit_steps cannot be empty, since we always visit the root block
                      let deepest = visit_steps.pop().unwrap();
                      let (mut block_idx, mut block, mut glen) =
                          self.mutable_block(deepest.block_idx);
                      if let Element::Rev(old_rev) = deepest.element {
                          let old_node = index
                              .check_revision(old_rev.into())
                              .and_then(|rev| index.node(rev))
                              .ok_or_else(|| {
                                  NodeMapError::RevisionNotInIndex(old_rev.into())
                              })?;
                          if old_node == node {
                              return Ok(()); // avoid creating lots of useless blocks
                          }
                          // Looping over the tail of nybbles in both nodes, creating
                          // new blocks until we find the difference
                          let mut new_block_idx = ro_len + glen;
                          let mut nybble = deepest.nybble;
                          for nybble_pos in read_nybbles..node.nybbles_len() {
                              block.set(nybble, Element::Block(new_block_idx));
                              let new_nybble = node.get_nybble(nybble_pos);
                              let old_nybble = old_node.get_nybble(nybble_pos);
                              if old_nybble == new_nybble {
                                  self.growable.push(Block::new());
                                  block = &mut self.growable[glen];
                                  glen += 1;
                                  new_block_idx += 1;
                                  nybble = new_nybble;
                              } else {
                                  let mut new_block = Block::new();
                                  new_block.set(old_nybble, Element::Rev(old_rev));
                                  new_block.set(new_nybble, Element::Rev(rev.0));
                                  self.growable.push(new_block);
                                  break;
                              }
                          }
                      } else {
                          // Free slot in the deepest block: no splitting has to be done
                          block.set(deepest.nybble, Element::Rev(rev.0));
                      }
                      // Backtrack over visit steps to update references
                      while let Some(visited) = visit_steps.pop() {
                          let to_write = Element::Block(block_idx);
                          if visit_steps.is_empty() {
                              self.root.set(visited.nybble, to_write);
                              break;
                          }
                          let (new_idx, block, _) = self.mutable_block(visited.block_idx);
                          if block.get(visited.nybble) == to_write {
                              break;
                          }
                          block.set(visited.nybble, to_write);
                          block_idx = new_idx;
                      }
                      Ok(())
                  }
                  /// Make the whole `NodeTree` logically empty, without touching the
                  /// immutable part.
                  pub fn invalidate_all(&mut self) {
                      self.root = Block::new();
                      self.growable = Vec::new();
                      self.masked_inner_blocks = self.readonly.len();
                  }
                  /// Return the number of blocks in the readonly part that are currently
                  /// masked in the mutable part.
                  ///
                  /// The `NodeTree` structure has no efficient way to know how many blocks
                  /// are already unreachable in the readonly part.
                  ///
                  /// After a call to `invalidate_all()`, the returned number can be actually
                  /// bigger than the whole readonly part, a conventional way to mean that
                  /// all the readonly blocks have been masked. This is what is really
                  /// useful to the caller and does not require to know how many were
                  /// actually unreachable to begin with.
                  pub fn masked_readonly_blocks(&self) -> usize {
                      if let Some(readonly_root) = self.readonly.last() {
                          if readonly_root == &self.root {
                              return 0;
                          }
                      } else {
                          return 0;
                      }
                      self.masked_inner_blocks + 1
                  }
              }
              pub struct NodeTreeBytes {
                  buffer: Box<dyn Deref<Target = [u8]> + Send>,
                  len_in_blocks: usize,
              }
              impl NodeTreeBytes {
                  fn new(
                      buffer: Box<dyn Deref<Target = [u8]> + Send>,
                      amount: usize,
                  ) -> Self {
                      assert!(buffer.len() >= amount);
                      let len_in_blocks = amount / size_of::<Block>();
                      NodeTreeBytes {
                          buffer,
                          len_in_blocks,
                      }
                  }
              }
              impl Deref for NodeTreeBytes {
                  type Target = [Block];
                  fn deref(&self) -> &[Block] {
                      Block::slice_from_bytes(&self.buffer, self.len_in_blocks)
                          // `NodeTreeBytes::new` already asserted that `self.buffer` is
                          // large enough.
                          .unwrap()
                          .0
                  }
              }
              struct NodeTreeVisitor<'n> {
                  nt: &'n NodeTree,
                  prefix: NodePrefix,
                  visit: usize,
                  nybble_idx: usize,
                  done: bool,
              }
              #[derive(Debug, PartialEq, Clone)]
              struct NodeTreeVisitItem {
                  block_idx: usize,
                  nybble: u8,
                  element: Element,
              }
              impl<'n> Iterator for NodeTreeVisitor<'n> {
                  type Item = NodeTreeVisitItem;
                  fn next(&mut self) -> Option<Self::Item> {
                      if self.done || self.nybble_idx >= self.prefix.nybbles_len() {
                          return None;
                      }
                      let nybble = self.prefix.get_nybble(self.nybble_idx);
                      self.nybble_idx += 1;
                      let visit = self.visit;
                      let element = self.nt[visit].get(nybble);
                      if let Element::Block(idx) = element {
                          self.visit = idx;
                      } else {
                          self.done = true;
                      }
                      Some(NodeTreeVisitItem {
                          block_idx: visit,
                          nybble,
                          element,
                      })
                  }
              }
              impl NodeTreeVisitItem {
                  // Return `Some(opt)` if this item is final, with `opt` being the
                  // `UncheckedRevision` that it may represent.
                  //
                  // If the item is not terminal, return `None`
                  fn final_revision(&self) -> Option<Option<UncheckedRevision>> {
                      match self.element {
                          Element::Block(_) => None,
                          Element::Rev(r) => Some(Some(r.into())),
                          Element::None => Some(None),
                      }
                  }
              }
              impl From<Vec<Block>> for NodeTree {
                  fn from(vec: Vec<Block>) -> Self {
                      Self::new(Box::new(vec))
                  }
              }
              impl fmt::Debug for NodeTree {
                  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-                     let readonly: &[Block] = &*self.readonly;
+                     let readonly: &[Block] = &self.readonly;
                      write!(
                          f,
                          "readonly: {:?}, growable: {:?}, root: {:?}",
                          readonly, self.growable, self.root
                      )
                  }
              }
              impl Default for NodeTree {
                  /// Create a fully mutable empty NodeTree
                  fn default() -> Self {
-                     NodeTree::new(Box::new(Vec::new()))
+                     NodeTree::new(Box::<Vec<_>>::default())
                  }
              }
              impl NodeMap for NodeTree {
                  fn find_bin<'a>(
                      &self,
                      idx: &impl RevlogIndex,
                      prefix: NodePrefix,
                  ) -> Result<Option<Revision>, NodeMapError> {
                      validate_candidate(idx, prefix, self.lookup(prefix)?)
                          .map(|(opt, _shortest)| opt)
                  }
                  fn unique_prefix_len_bin<'a>(
                      &self,
                      idx: &impl RevlogIndex,
                      prefix: NodePrefix,
                  ) -> Result<Option<usize>, NodeMapError> {
                      validate_candidate(idx, prefix, self.lookup(prefix)?)
                          .map(|(opt, shortest)| opt.map(|_rev| shortest))
                  }
              }
              #[cfg(test)]
              pub mod tests {
                  use super::NodeMapError::*;
                  use super::*;
                  use crate::revlog::node::{hex_pad_right, Node};
                  use std::collections::HashMap;
                  /// Creates a `Block` using a syntax close to the `Debug` output
                  macro_rules! block {
                      {$($nybble:tt : $variant:ident($val:tt)),*} => (
                          {
                              let mut block = Block::new();
                              $(block.set($nybble, Element::$variant($val)));*;
                              block
                          }
                      )
                  }
                  /// Shorthand to reduce boilerplate when creating [`Revision`] for testing
                  macro_rules! R {
                      ($revision:literal) => {
                          Revision($revision)
                      };
                  }
                  #[test]
                  fn test_block_debug() {
                      let mut block = Block::new();
                      block.set(1, Element::Rev(3));
                      block.set(10, Element::Block(0));
                      assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
                  }
                  #[test]
                  fn test_block_macro() {
                      let block = block! {5: Block(2)};
                      assert_eq!(format!("{:?}", block), "{5: Block(2)}");
                      let block = block! {13: Rev(15), 5: Block(2)};
                      assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
                  }
                  #[test]
                  fn test_raw_block() {
                      let mut raw = [255u8; 64];
                      let mut counter = 0;
                      for val in [0_i32, 15, -2, -1, -3].iter() {
                          for byte in val.to_be_bytes().iter() {
                              raw[counter] = *byte;
                              counter += 1;
                          }
                      }
                      let (block, _) = Block::from_bytes(&raw).unwrap();
                      assert_eq!(block.get(0), Element::Block(0));
                      assert_eq!(block.get(1), Element::Block(15));
                      assert_eq!(block.get(3), Element::None);
                      assert_eq!(block.get(2), Element::Rev(0));
                      assert_eq!(block.get(4), Element::Rev(1));
                  }
                  type TestIndex = HashMap<UncheckedRevision, Node>;
                  impl RevlogIndex for TestIndex {
                      fn node(&self, rev: Revision) -> Option<&Node> {
                          self.get(&rev.into())
                      }
                      fn len(&self) -> usize {
                          self.len()
                      }
                      fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {
                          self.get(&rev).map(|_| Revision(rev.0))
                      }
                  }
                  /// Pad hexadecimal Node prefix with zeros on the right
                  ///
                  /// This avoids having to repeatedly write very long hexadecimal
                  /// strings for test data, and brings actual hash size independency.
                  #[cfg(test)]
                  fn pad_node(hex: &str) -> Node {
-                     Node::from_hex(&hex_pad_right(hex)).unwrap()
+                     Node::from_hex(hex_pad_right(hex)).unwrap()
                  }
                  /// Pad hexadecimal Node prefix with zeros on the right, then insert
                  fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) {
                      idx.insert(rev.into(), pad_node(hex));
                  }
                  fn sample_nodetree() -> NodeTree {
                      NodeTree::from(vec![
                          block![0: Rev(9)],
                          block![0: Rev(0), 1: Rev(9)],
                          block![0: Block(1), 1:Rev(1)],
                      ])
                  }
                  fn hex(s: &str) -> NodePrefix {
                      NodePrefix::from_hex(s).unwrap()
                  }
                  #[test]
                  fn test_nt_debug() {
                      let nt = sample_nodetree();
                      assert_eq!(
                          format!("{:?}", nt),
                          "readonly: \
                           [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \
                           growable: [], \
                           root: {0: Block(1), 1: Rev(1)}",
                      );
                  }
                  #[test]
                  fn test_immutable_find_simplest() -> Result<(), NodeMapError> {
                      let mut idx: TestIndex = HashMap::new();
                      pad_insert(&mut idx, R!(1), "1234deadcafe");
                      let nt = NodeTree::from(vec![block! {1: Rev(1)}]);
                      assert_eq!(nt.find_bin(&idx, hex("1"))?, Some(R!(1)));
                      assert_eq!(nt.find_bin(&idx, hex("12"))?, Some(R!(1)));
                      assert_eq!(nt.find_bin(&idx, hex("1234de"))?, Some(R!(1)));
                      assert_eq!(nt.find_bin(&idx, hex("1a"))?, None);
                      assert_eq!(nt.find_bin(&idx, hex("ab"))?, None);
                      // and with full binary Nodes
                      assert_eq!(
                          nt.find_node(&idx, idx.get(&1.into()).unwrap())?,
                          Some(R!(1))
                      );
-                     let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap();
+                     let unknown = Node::from_hex(hex_pad_right("3d")).unwrap();
                      assert_eq!(nt.find_node(&idx, &unknown)?, None);
                      Ok(())
                  }
                  #[test]
                  fn test_immutable_find_one_jump() {
                      let mut idx = TestIndex::new();
                      pad_insert(&mut idx, R!(9), "012");
                      pad_insert(&mut idx, R!(0), "00a");
                      let nt = sample_nodetree();
                      assert_eq!(nt.find_bin(&idx, hex("0")), Err(MultipleResults));
                      assert_eq!(nt.find_bin(&idx, hex("01")), Ok(Some(R!(9))));
                      assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
                      assert_eq!(nt.find_bin(&idx, hex("00a")), Ok(Some(R!(0))));
                      assert_eq!(nt.unique_prefix_len_bin(&idx, hex("00a")), Ok(Some(3)));
                      assert_eq!(nt.find_bin(&idx, hex("000")), Ok(Some(NULL_REVISION)));
                  }
                  #[test]
                  fn test_mutated_find() -> Result<(), NodeMapError> {
                      let mut idx = TestIndex::new();
                      pad_insert(&mut idx, R!(9), "012");
                      pad_insert(&mut idx, R!(0), "00a");
                      pad_insert(&mut idx, R!(2), "cafe");
                      pad_insert(&mut idx, R!(3), "15");
                      pad_insert(&mut idx, R!(1), "10");
                      let nt = NodeTree {
                          readonly: sample_nodetree().readonly,
                          growable: vec![block![0: Rev(1), 5: Rev(3)]],
                          root: block![0: Block(1), 1:Block(3), 12: Rev(2)],
                          masked_inner_blocks: 1,
                      };
                      assert_eq!(nt.find_bin(&idx, hex("10"))?, Some(R!(1)));
                      assert_eq!(nt.find_bin(&idx, hex("c"))?, Some(R!(2)));
                      assert_eq!(nt.unique_prefix_len_bin(&idx, hex("c"))?, Some(1));
                      assert_eq!(nt.find_bin(&idx, hex("00")), Err(MultipleResults));
                      assert_eq!(nt.find_bin(&idx, hex("000"))?, Some(NULL_REVISION));
                      assert_eq!(nt.unique_prefix_len_bin(&idx, hex("000"))?, Some(3));
                      assert_eq!(nt.find_bin(&idx, hex("01"))?, Some(R!(9)));
                      assert_eq!(nt.masked_readonly_blocks(), 2);
                      Ok(())
                  }
                  pub struct TestNtIndex {
                      pub index: TestIndex,
                      pub nt: NodeTree,
                  }
                  impl TestNtIndex {
                      pub fn new() -> Self {
                          TestNtIndex {
                              index: HashMap::new(),
                              nt: NodeTree::default(),
                          }
                      }
                      pub fn insert_node(
                          &mut self,
                          rev: Revision,
                          node: Node,
                      ) -> Result<(), NodeMapError> {
                          self.index.insert(rev.into(), node);
                          self.nt.insert(&self.index, &node, rev)?;
                          Ok(())
                      }
                      pub fn insert(
                          &mut self,
                          rev: Revision,
                          hex: &str,
                      ) -> Result<(), NodeMapError> {
                          let node = pad_node(hex);
-                         return self.insert_node(rev, node);
+                         self.insert_node(rev, node)
                      }
                      fn find_hex(
                          &self,
                          prefix: &str,
                      ) -> Result<Option<Revision>, NodeMapError> {
                          self.nt.find_bin(&self.index, hex(prefix))
                      }
                      fn unique_prefix_len_hex(
                          &self,
                          prefix: &str,
                      ) -> Result<Option<usize>, NodeMapError> {
                          self.nt.unique_prefix_len_bin(&self.index, hex(prefix))
                      }
                      /// Drain `added` and restart a new one
                      fn commit(self) -> Self {
                          let mut as_vec: Vec<Block> =
                              self.nt.readonly.iter().copied().collect();
                          as_vec.extend(self.nt.growable);
                          as_vec.push(self.nt.root);
                          Self {
                              index: self.index,
                              nt: NodeTree::from(as_vec),
                          }
                      }
                  }
+                 impl Default for TestNtIndex {
+                     fn default() -> Self {
+                         Self::new()
+                     }
+                 }
                  #[test]
                  fn test_insert_full_mutable() -> Result<(), NodeMapError> {
                      let mut idx = TestNtIndex::new();
                      idx.insert(Revision(0), "1234")?;
                      assert_eq!(idx.find_hex("1")?, Some(R!(0)));
                      assert_eq!(idx.find_hex("12")?, Some(R!(0)));
                      // let's trigger a simple split
                      idx.insert(Revision(1), "1a34")?;
                      assert_eq!(idx.nt.growable.len(), 1);
                      assert_eq!(idx.find_hex("12")?, Some(R!(0)));
                      assert_eq!(idx.find_hex("1a")?, Some(R!(1)));
                      // reinserting is a no_op
                      idx.insert(Revision(1), "1a34")?;
                      assert_eq!(idx.nt.growable.len(), 1);
                      assert_eq!(idx.find_hex("12")?, Some(R!(0)));
                      assert_eq!(idx.find_hex("1a")?, Some(R!(1)));
                      idx.insert(Revision(2), "1a01")?;
                      assert_eq!(idx.nt.growable.len(), 2);
                      assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults));
                      assert_eq!(idx.find_hex("12")?, Some(R!(0)));
                      assert_eq!(idx.find_hex("1a3")?, Some(R!(1)));
                      assert_eq!(idx.find_hex("1a0")?, Some(R!(2)));
                      assert_eq!(idx.find_hex("1a12")?, None);
                      // now let's make it split and create more than one additional block
                      idx.insert(Revision(3), "1a345")?;
                      assert_eq!(idx.nt.growable.len(), 4);
                      assert_eq!(idx.find_hex("1a340")?, Some(R!(1)));
                      assert_eq!(idx.find_hex("1a345")?, Some(R!(3)));
                      assert_eq!(idx.find_hex("1a341")?, None);
                      // there's no readonly block to mask
                      assert_eq!(idx.nt.masked_readonly_blocks(), 0);
                      Ok(())
                  }
                  #[test]
                  fn test_unique_prefix_len_zero_prefix() {
                      let mut idx = TestNtIndex::new();
                      idx.insert(Revision(0), "00000abcd").unwrap();
                      assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
                      // in the nodetree proper, this will be found at the first nybble
                      // yet the correct answer for unique_prefix_len is not 1, nor 1+1,
                      // but the first difference with `NULL_NODE`
                      assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
                      assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
                      // same with odd result
                      idx.insert(Revision(1), "00123").unwrap();
                      assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3)));
                      assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3)));
                      // these are unchanged of course
                      assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
                      assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
                  }
                  #[test]
                  fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
                      // check that the splitting loop is long enough
                      let mut nt_idx = TestNtIndex::new();
                      let nt = &mut nt_idx.nt;
                      let idx = &mut nt_idx.index;
                      let node0_hex = hex_pad_right("444444");
                      let mut node1_hex = hex_pad_right("444444");
                      node1_hex.pop();
                      node1_hex.push('5');
-                     let node0 = Node::from_hex(&node0_hex).unwrap();
+                     let node0 = Node::from_hex(node0_hex).unwrap();
                      let node1 = Node::from_hex(&node1_hex).unwrap();
                      idx.insert(0.into(), node0);
                      nt.insert(idx, &node0, R!(0))?;
                      idx.insert(1.into(), node1);
                      nt.insert(idx, &node1, R!(1))?;
                      assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(R!(0)));
                      assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(R!(1)));
                      Ok(())
                  }
                  #[test]
                  fn test_insert_partly_immutable() -> Result<(), NodeMapError> {
                      let mut idx = TestNtIndex::new();
                      idx.insert(Revision(0), "1234")?;
                      idx.insert(Revision(1), "1235")?;
                      idx.insert(Revision(2), "131")?;
                      idx.insert(Revision(3), "cafe")?;
                      let mut idx = idx.commit();
                      assert_eq!(idx.find_hex("1234")?, Some(R!(0)));
                      assert_eq!(idx.find_hex("1235")?, Some(R!(1)));
                      assert_eq!(idx.find_hex("131")?, Some(R!(2)));
                      assert_eq!(idx.find_hex("cafe")?, Some(R!(3)));
                      // we did not add anything since init from readonly
                      assert_eq!(idx.nt.masked_readonly_blocks(), 0);
                      idx.insert(Revision(4), "123A")?;
                      assert_eq!(idx.find_hex("1234")?, Some(R!(0)));
                      assert_eq!(idx.find_hex("1235")?, Some(R!(1)));
                      assert_eq!(idx.find_hex("131")?, Some(R!(2)));
                      assert_eq!(idx.find_hex("cafe")?, Some(R!(3)));
                      assert_eq!(idx.find_hex("123A")?, Some(R!(4)));
                      // we masked blocks for all prefixes of "123", including the root
                      assert_eq!(idx.nt.masked_readonly_blocks(), 4);
                      eprintln!("{:?}", idx.nt);
                      idx.insert(Revision(5), "c0")?;
                      assert_eq!(idx.find_hex("cafe")?, Some(R!(3)));
                      assert_eq!(idx.find_hex("c0")?, Some(R!(5)));
                      assert_eq!(idx.find_hex("c1")?, None);
                      assert_eq!(idx.find_hex("1234")?, Some(R!(0)));
                      // inserting "c0" is just splitting the 'c' slot of the mutable root,
                      // it doesn't mask anything
                      assert_eq!(idx.nt.masked_readonly_blocks(), 4);
                      Ok(())
                  }
                  #[test]
                  fn test_invalidate_all() -> Result<(), NodeMapError> {
                      let mut idx = TestNtIndex::new();
                      idx.insert(Revision(0), "1234")?;
                      idx.insert(Revision(1), "1235")?;
                      idx.insert(Revision(2), "131")?;
                      idx.insert(Revision(3), "cafe")?;
                      let mut idx = idx.commit();
                      idx.nt.invalidate_all();
                      assert_eq!(idx.find_hex("1234")?, None);
                      assert_eq!(idx.find_hex("1235")?, None);
                      assert_eq!(idx.find_hex("131")?, None);
                      assert_eq!(idx.find_hex("cafe")?, None);
                      // all the readonly blocks have been masked, this is the
                      // conventional expected response
                      assert_eq!(idx.nt.masked_readonly_blocks(), idx.nt.readonly.len() + 1);
                      Ok(())
                  }
                  #[test]
                  fn test_into_added_empty() {
                      assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
                      assert!(sample_nodetree()
                          .into_readonly_and_added_bytes()
                          .1
                          .is_empty());
                  }
                  #[test]
                  fn test_into_added_bytes() -> Result<(), NodeMapError> {
                      let mut idx = TestNtIndex::new();
                      idx.insert(Revision(0), "1234")?;
                      let mut idx = idx.commit();
                      idx.insert(Revision(4), "cafe")?;
                      let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
                      // only the root block has been changed
                      assert_eq!(bytes.len(), size_of::<Block>());
                      // big endian for -2
                      assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
                      // big endian for -6
                      assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
                      Ok(())
                  }
              }

rust/hg-core/src/revlog/nodemap_docket.rs

0 +1 -1

              use crate::errors::{HgError, HgResultExt};
              use bytes_cast::{unaligned, BytesCast};
              use memmap2::Mmap;
              use std::path::{Path, PathBuf};
              use crate::vfs::Vfs;
              const ONDISK_VERSION: u8 = 1;
              pub(super) struct NodeMapDocket {
                  pub data_length: usize,
                  // TODO: keep here more of the data from `parse()` when we need it
              }
              #[derive(BytesCast)]
              #[repr(C)]
              struct DocketHeader {
                  uid_size: u8,
                  _tip_rev: unaligned::U64Be,
                  data_length: unaligned::U64Be,
                  _data_unused: unaligned::U64Be,
                  tip_node_size: unaligned::U64Be,
              }
              impl NodeMapDocket {
                  /// Return `Ok(None)` when the caller should proceed without a persistent
                  /// nodemap:
                  ///
                  /// * This revlog does not have a `.n` docket file (it is not generated for
                  ///   small revlogs), or
                  /// * The docket has an unsupported version number (repositories created by
                  ///   later hg, maybe that should be a requirement instead?), or
                  /// * The docket file points to a missing (likely deleted) data file (this
                  ///   can happen in a rare race condition).
                  pub fn read_from_file(
                      store_vfs: &Vfs,
                      index_path: &Path,
                  ) -> Result<Option<(Self, Mmap)>, HgError> {
                      let docket_path = index_path.with_extension("n");
                      let docket_bytes = if let Some(bytes) =
                          store_vfs.read(&docket_path).io_not_found_as_none()?
                      {
                          bytes
                      } else {
                          return Ok(None);
                      };
                      let input = if let Some((&ONDISK_VERSION, rest)) =
                          docket_bytes.split_first()
                      {
                          rest
                      } else {
                          return Ok(None);
                      };
                      /// Treat any error as a parse error
                      fn parse<T, E>(result: Result<T, E>) -> Result<T, HgError> {
                          result
                              .map_err(|_| HgError::corrupted("nodemap docket parse error"))
                      }
                      let (header, rest) = parse(DocketHeader::from_bytes(input))?;
                      let uid_size = header.uid_size as usize;
                      // TODO: do we care about overflow for 4 GB+ nodemap files on 32-bit
                      // systems?
                      let tip_node_size = header.tip_node_size.get() as usize;
                      let data_length = header.data_length.get() as usize;
                      let (uid, rest) = parse(u8::slice_from_bytes(rest, uid_size))?;
                      let (_tip_node, _rest) =
                          parse(u8::slice_from_bytes(rest, tip_node_size))?;
                      let uid = parse(std::str::from_utf8(uid))?;
                      let docket = NodeMapDocket { data_length };
                      let data_path = rawdata_path(&docket_path, uid);
                      // TODO: use `vfs.read()` here when the `persistent-nodemap.mmap`
                      // config is false?
                      if let Some(mmap) =
-                         store_vfs.mmap_open(&data_path).io_not_found_as_none()?
+                         store_vfs.mmap_open(data_path).io_not_found_as_none()?
                      {
                          if mmap.len() >= data_length {
                              Ok(Some((docket, mmap)))
                          } else {
                              Err(HgError::corrupted("persistent nodemap too short"))
                          }
                      } else {
                          // Even if .hg/requires opted in, some revlogs are deemed small
                          // enough to not need a persistent nodemap.
                          Ok(None)
                      }
                  }
              }
              fn rawdata_path(docket_path: &Path, uid: &str) -> PathBuf {
                  let docket_name = docket_path
                      .file_name()
                      .expect("expected a base name")
                      .to_str()
                      .expect("expected an ASCII file name in the store");
                  let prefix = docket_name
                      .strip_suffix(".n.a")
                      .or_else(|| docket_name.strip_suffix(".n"))
                      .expect("expected docket path in .n or .n.a");
                  let name = format!("{}-{}.nd", prefix, uid);
                  docket_path
                      .parent()
                      .expect("expected a non-root path")
                      .join(name)
              }

rust/hg-core/src/utils/files.rs

0 +8 -8

              // files.rs
              //
              // Copyright 2019
              // Raphaël Gomès <rgomes@octobus.net>,
              // Yuya Nishihara <yuya@tcha.org>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Functions for fiddling with files.
              use crate::utils::{
                  hg_path::{path_to_hg_path_buf, HgPath, HgPathBuf, HgPathError},
                  path_auditor::PathAuditor,
                  replace_slice,
              };
              use lazy_static::lazy_static;
              use same_file::is_same_file;
              use std::borrow::{Cow, ToOwned};
              use std::ffi::{OsStr, OsString};
              use std::iter::FusedIterator;
              use std::ops::Deref;
              use std::path::{Path, PathBuf};
              pub fn get_os_str_from_bytes(bytes: &[u8]) -> &OsStr {
                  let os_str;
                  #[cfg(unix)]
                  {
                      use std::os::unix::ffi::OsStrExt;
                      os_str = std::ffi::OsStr::from_bytes(bytes);
                  }
                  // TODO Handle other platforms
                  // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
                  // Perhaps, the return type would have to be Result<PathBuf>.
                  os_str
              }
              pub fn get_path_from_bytes(bytes: &[u8]) -> &Path {
                  Path::new(get_os_str_from_bytes(bytes))
              }
              // TODO: need to convert from WTF8 to MBCS bytes on Windows.
              // that's why Vec<u8> is returned.
              #[cfg(unix)]
              pub fn get_bytes_from_path(path: impl AsRef<Path>) -> Vec<u8> {
                  get_bytes_from_os_str(path.as_ref())
              }
              #[cfg(unix)]
              pub fn get_bytes_from_os_str(str: impl AsRef<OsStr>) -> Vec<u8> {
                  use std::os::unix::ffi::OsStrExt;
                  str.as_ref().as_bytes().to_vec()
              }
              #[cfg(unix)]
              pub fn get_bytes_from_os_string(str: OsString) -> Vec<u8> {
                  use std::os::unix::ffi::OsStringExt;
                  str.into_vec()
              }
              /// An iterator over repository path yielding itself and its ancestors.
              #[derive(Copy, Clone, Debug)]
              pub struct Ancestors<'a> {
                  next: Option<&'a HgPath>,
              }
              impl<'a> Iterator for Ancestors<'a> {
                  type Item = &'a HgPath;
                  fn next(&mut self) -> Option<Self::Item> {
                      let next = self.next;
                      self.next = match self.next {
                          Some(s) if s.is_empty() => None,
                          Some(s) => {
                              let p = s.bytes().rposition(|c| *c == b'/').unwrap_or(0);
                              Some(HgPath::new(&s.as_bytes()[..p]))
                          }
                          None => None,
                      };
                      next
                  }
              }
              impl<'a> FusedIterator for Ancestors<'a> {}
              /// An iterator over repository path yielding itself and its ancestors.
              #[derive(Copy, Clone, Debug)]
              pub(crate) struct AncestorsWithBase<'a> {
                  next: Option<(&'a HgPath, &'a HgPath)>,
              }
              impl<'a> Iterator for AncestorsWithBase<'a> {
                  type Item = (&'a HgPath, &'a HgPath);
                  fn next(&mut self) -> Option<Self::Item> {
                      let next = self.next;
                      self.next = match self.next {
                          Some((s, _)) if s.is_empty() => None,
                          Some((s, _)) => Some(s.split_filename()),
                          None => None,
                      };
                      next
                  }
              }
              impl<'a> FusedIterator for AncestorsWithBase<'a> {}
              /// Returns an iterator yielding ancestor directories of the given repository
              /// path.
              ///
              /// The path is separated by '/', and must not start with '/'.
              ///
              /// The path itself isn't included unless it is b"" (meaning the root
              /// directory.)
              pub fn find_dirs(path: &HgPath) -> Ancestors {
                  let mut dirs = Ancestors { next: Some(path) };
                  if !path.is_empty() {
                      dirs.next(); // skip itself
                  }
                  dirs
              }
              /// Returns an iterator yielding ancestor directories of the given repository
              /// path.
              ///
              /// The path is separated by '/', and must not start with '/'.
              ///
              /// The path itself isn't included unless it is b"" (meaning the root
              /// directory.)
              pub(crate) fn find_dirs_with_base(path: &HgPath) -> AncestorsWithBase {
                  let mut dirs = AncestorsWithBase {
                      next: Some((path, HgPath::new(b""))),
                  };
                  if !path.is_empty() {
                      dirs.next(); // skip itself
                  }
                  dirs
              }
              /// TODO more than ASCII?
              pub fn normalize_case(path: &HgPath) -> HgPathBuf {
                  #[cfg(windows)] // NTFS compares via upper()
                  return path.to_ascii_uppercase();
                  #[cfg(unix)]
                  path.to_ascii_lowercase()
              }
              lazy_static! {
                  static ref IGNORED_CHARS: Vec<Vec<u8>> = {
                      [
 x200c, 0x200d, 0x200e, 0x200f, 0x202a, 0x202b, 0x202c, 0x202d,
 x202e, 0x206a, 0x206b, 0x206c, 0x206d, 0x206e, 0x206f, 0xfeff,
                      ]
                      .iter()
                      .map(|code| {
                          std::char::from_u32(*code)
                              .unwrap()
                              .encode_utf8(&mut [0; 3])
                              .bytes()
                              .collect()
                      })
                      .collect()
                  };
              }
              fn hfs_ignore_clean(bytes: &[u8]) -> Vec<u8> {
                  let mut buf = bytes.to_owned();
                  let needs_escaping = bytes.iter().any(|b| *b == b'\xe2' || *b == b'\xef');
                  if needs_escaping {
                      for forbidden in IGNORED_CHARS.iter() {
                          replace_slice(&mut buf, forbidden, &[])
                      }
                      buf
                  } else {
                      buf
                  }
              }
              pub fn lower_clean(bytes: &[u8]) -> Vec<u8> {
                  hfs_ignore_clean(&bytes.to_ascii_lowercase())
              }
              /// Returns the canonical path of `name`, given `cwd` and `root`
              pub fn canonical_path(
                  root: impl AsRef<Path>,
                  cwd: impl AsRef<Path>,
                  name: impl AsRef<Path>,
              ) -> Result<PathBuf, HgPathError> {
                  // TODO add missing normalization for other platforms
                  let root = root.as_ref();
                  let cwd = cwd.as_ref();
                  let name = name.as_ref();
                  let name = if !name.is_absolute() {
-                     root.join(&cwd).join(&name)
+                     root.join(cwd).join(name)
                  } else {
                      name.to_owned()
                  };
-                 let auditor = PathAuditor::new(&root);
-                 if name != root && name.starts_with(&root) {
-                     let name = name.strip_prefix(&root).unwrap();
+                 let auditor = PathAuditor::new(root);
+                 if name != root && name.starts_with(root) {
+                     let name = name.strip_prefix(root).unwrap();
                      auditor.audit_path(path_to_hg_path_buf(name)?)?;
                      Ok(name.to_owned())
                  } else if name == root {
                      Ok("".into())
                  } else {
                      // Determine whether `name' is in the hierarchy at or beneath `root',
                      // by iterating name=name.parent() until it returns `None` (can't
                      // check name == '/', because that doesn't work on windows).
                      let mut name = name.deref();
                      let original_name = name.to_owned();
                      loop {
-                         let same = is_same_file(&name, &root).unwrap_or(false);
+                         let same = is_same_file(name, root).unwrap_or(false);
                          if same {
                              if name == original_name {
                                  // `name` was actually the same as root (maybe a symlink)
                                  return Ok("".into());
                              }
                              // `name` is a symlink to root, so `original_name` is under
                              // root
-                             let rel_path = original_name.strip_prefix(&name).unwrap();
-                             auditor.audit_path(path_to_hg_path_buf(&rel_path)?)?;
+                             let rel_path = original_name.strip_prefix(name).unwrap();
+                             auditor.audit_path(path_to_hg_path_buf(rel_path)?)?;
                              return Ok(rel_path.to_owned());
                          }
                          name = match name.parent() {
                              None => break,
                              Some(p) => p,
                          };
                      }
                      // TODO hint to the user about using --cwd
                      // Bubble up the responsibility to Python for now
                      Err(HgPathError::NotUnderRoot {
                          path: original_name,
                          root: root.to_owned(),
                      })
                  }
              }
              /// Returns the representation of the path relative to the current working
              /// directory for display purposes.
              ///
              /// `cwd` is a `HgPath`, so it is considered relative to the root directory
              /// of the repository.
              ///
              /// # Examples
              ///
              /// ```
              /// use hg::utils::hg_path::HgPath;
              /// use hg::utils::files::relativize_path;
              /// use std::borrow::Cow;
              ///
              /// let file = HgPath::new(b"nested/file");
              /// let cwd = HgPath::new(b"");
              /// assert_eq!(relativize_path(file, cwd), Cow::Borrowed(b"nested/file"));
              ///
              /// let cwd = HgPath::new(b"nested");
              /// assert_eq!(relativize_path(file, cwd), Cow::Borrowed(b"file"));
              ///
              /// let cwd = HgPath::new(b"other");
              /// assert_eq!(relativize_path(file, cwd), Cow::Borrowed(b"../nested/file"));
              /// ```
              pub fn relativize_path(path: &HgPath, cwd: impl AsRef<HgPath>) -> Cow<[u8]> {
                  if cwd.as_ref().is_empty() {
                      Cow::Borrowed(path.as_bytes())
                  } else {
                      // This is not all accurate as to how large `res` will actually be, but
                      // profiling `rhg files` on a large-ish repo shows it’s better than
                      // starting from a zero-capacity `Vec` and letting `extend` reallocate
                      // repeatedly.
                      let guesstimate = path.as_bytes().len();
                      let mut res: Vec<u8> = Vec::with_capacity(guesstimate);
                      let mut path_iter = path.as_bytes().split(|b| *b == b'/').peekable();
                      let mut cwd_iter =
                          cwd.as_ref().as_bytes().split(|b| *b == b'/').peekable();
                      loop {
                          match (path_iter.peek(), cwd_iter.peek()) {
                              (Some(a), Some(b)) if a == b => (),
                              _ => break,
                          }
                          path_iter.next();
                          cwd_iter.next();
                      }
                      let mut need_sep = false;
                      for _ in cwd_iter {
                          if need_sep {
                              res.extend(b"/")
                          } else {
                              need_sep = true
                          };
                          res.extend(b"..");
                      }
                      for c in path_iter {
                          if need_sep {
                              res.extend(b"/")
                          } else {
                              need_sep = true
                          };
                          res.extend(c);
                      }
                      Cow::Owned(res)
                  }
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  use pretty_assertions::assert_eq;
                  #[test]
                  fn find_dirs_some() {
                      let mut dirs = super::find_dirs(HgPath::new(b"foo/bar/baz"));
                      assert_eq!(dirs.next(), Some(HgPath::new(b"foo/bar")));
                      assert_eq!(dirs.next(), Some(HgPath::new(b"foo")));
                      assert_eq!(dirs.next(), Some(HgPath::new(b"")));
                      assert_eq!(dirs.next(), None);
                      assert_eq!(dirs.next(), None);
                  }
                  #[test]
                  fn find_dirs_empty() {
                      // looks weird, but mercurial.pathutil.finddirs(b"") yields b""
                      let mut dirs = super::find_dirs(HgPath::new(b""));
                      assert_eq!(dirs.next(), Some(HgPath::new(b"")));
                      assert_eq!(dirs.next(), None);
                      assert_eq!(dirs.next(), None);
                  }
                  #[test]
                  fn test_find_dirs_with_base_some() {
                      let mut dirs = super::find_dirs_with_base(HgPath::new(b"foo/bar/baz"));
                      assert_eq!(
                          dirs.next(),
                          Some((HgPath::new(b"foo/bar"), HgPath::new(b"baz")))
                      );
                      assert_eq!(
                          dirs.next(),
                          Some((HgPath::new(b"foo"), HgPath::new(b"bar")))
                      );
                      assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b"foo"))));
                      assert_eq!(dirs.next(), None);
                      assert_eq!(dirs.next(), None);
                  }
                  #[test]
                  fn test_find_dirs_with_base_empty() {
                      let mut dirs = super::find_dirs_with_base(HgPath::new(b""));
                      assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b""))));
                      assert_eq!(dirs.next(), None);
                      assert_eq!(dirs.next(), None);
                  }
                  #[test]
                  fn test_canonical_path() {
                      let root = Path::new("/repo");
                      let cwd = Path::new("/dir");
                      let name = Path::new("filename");
                      assert_eq!(
                          canonical_path(root, cwd, name),
                          Err(HgPathError::NotUnderRoot {
                              path: PathBuf::from("/dir/filename"),
                              root: root.to_path_buf()
                          })
                      );
                      let root = Path::new("/repo");
                      let cwd = Path::new("/");
                      let name = Path::new("filename");
                      assert_eq!(
                          canonical_path(root, cwd, name),
                          Err(HgPathError::NotUnderRoot {
                              path: PathBuf::from("/filename"),
                              root: root.to_path_buf()
                          })
                      );
                      let root = Path::new("/repo");
                      let cwd = Path::new("/");
                      let name = Path::new("repo/filename");
                      assert_eq!(
                          canonical_path(root, cwd, name),
                          Ok(PathBuf::from("filename"))
                      );
                      let root = Path::new("/repo");
                      let cwd = Path::new("/repo");
                      let name = Path::new("filename");
                      assert_eq!(
                          canonical_path(root, cwd, name),
                          Ok(PathBuf::from("filename"))
                      );
                      let root = Path::new("/repo");
                      let cwd = Path::new("/repo/subdir");
                      let name = Path::new("filename");
                      assert_eq!(
                          canonical_path(root, cwd, name),
                          Ok(PathBuf::from("subdir/filename"))
                      );
                  }
                  #[test]
                  fn test_canonical_path_not_rooted() {
                      use std::fs::create_dir;
                      use tempfile::tempdir;
                      let base_dir = tempdir().unwrap();
                      let base_dir_path = base_dir.path();
                      let beneath_repo = base_dir_path.join("a");
                      let root = base_dir_path.join("a/b");
                      let out_of_repo = base_dir_path.join("c");
                      let under_repo_symlink = out_of_repo.join("d");
                      create_dir(&beneath_repo).unwrap();
                      create_dir(&root).unwrap();
                      // TODO make portable
                      std::os::unix::fs::symlink(&root, &out_of_repo).unwrap();
                      assert_eq!(
                          canonical_path(&root, Path::new(""), out_of_repo),
                          Ok(PathBuf::from(""))
                      );
                      assert_eq!(
                          canonical_path(&root, Path::new(""), &beneath_repo),
                          Err(HgPathError::NotUnderRoot {
                              path: beneath_repo,
                              root: root.to_owned()
                          })
                      );
                      assert_eq!(
-                         canonical_path(&root, Path::new(""), &under_repo_symlink),
+                         canonical_path(&root, Path::new(""), under_repo_symlink),
                          Ok(PathBuf::from("d"))
                      );
                  }
              }

rust/hg-core/src/utils/path_auditor.rs

0 +5 -5

              // path_auditor.rs
              //
              // Copyright 2020
              // Raphaël Gomès <rgomes@octobus.net>,
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              use crate::utils::{
                  files::lower_clean,
                  find_slice_in_slice,
                  hg_path::{hg_path_to_path_buf, HgPath, HgPathBuf, HgPathError},
              };
              use std::collections::HashSet;
              use std::path::{Path, PathBuf};
              use std::sync::{Mutex, RwLock};
              /// Ensures that a path is valid for use in the repository i.e. does not use
              /// any banned components, does not traverse a symlink, etc.
              #[derive(Debug, Default)]
              pub struct PathAuditor {
                  audited: Mutex<HashSet<HgPathBuf>>,
                  audited_dirs: RwLock<HashSet<HgPathBuf>>,
                  root: PathBuf,
              }
              impl PathAuditor {
                  pub fn new(root: impl AsRef<Path>) -> Self {
                      Self {
                          root: root.as_ref().to_owned(),
                          ..Default::default()
                      }
                  }
                  pub fn audit_path(
                      &self,
                      path: impl AsRef<HgPath>,
                  ) -> Result<(), HgPathError> {
                      // TODO windows "localpath" normalization
                      let path = path.as_ref();
                      if path.is_empty() {
                          return Ok(());
                      }
                      // TODO case normalization
                      if self.audited.lock().unwrap().contains(path) {
                          return Ok(());
                      }
                      // AIX ignores "/" at end of path, others raise EISDIR.
                      let last_byte = path.as_bytes()[path.len() - 1];
                      if last_byte == b'/' || last_byte == b'\\' {
                          return Err(HgPathError::EndsWithSlash(path.to_owned()));
                      }
                      let parts: Vec<_> = path
                          .as_bytes()
                          .split(|b| std::path::is_separator(*b as char))
                          .collect();
                      let first_component = lower_clean(parts[0]);
                      let first_component = first_component.as_slice();
                      if !path.split_drive().0.is_empty()
                          || (first_component == b".hg"
                              || first_component == b".hg."
                              || first_component == b"")
                          || parts.iter().any(|c| c == b"..")
                      {
                          return Err(HgPathError::InsideDotHg(path.to_owned()));
                      }
                      // Windows shortname aliases
                      for part in parts.iter() {
                          if part.contains(&b'~') {
                              let mut split = part.splitn(2, |b| *b == b'~');
                              let first =
                                  split.next().unwrap().to_owned().to_ascii_uppercase();
                              let last = split.next().unwrap();
                              if last.iter().all(u8::is_ascii_digit)
                                  && (first == b"HG" || first == b"HG8B6C")
                              {
                                  return Err(HgPathError::ContainsIllegalComponent(
                                      path.to_owned(),
                                  ));
                              }
                          }
                      }
                      let lower_path = lower_clean(path.as_bytes());
                      if find_slice_in_slice(&lower_path, b".hg").is_some() {
                          let lower_parts: Vec<_> = path
                              .as_bytes()
                              .split(|b| std::path::is_separator(*b as char))
                              .collect();
                          for pattern in [b".hg".to_vec(), b".hg.".to_vec()].iter() {
                              if let Some(pos) = lower_parts[1..]
                                  .iter()
                                  .position(|part| part == &pattern.as_slice())
                              {
                                  let base = lower_parts[..=pos]
                                      .iter()
                                      .fold(HgPathBuf::new(), |acc, p| {
                                          acc.join(HgPath::new(p))
                                      });
                                  return Err(HgPathError::IsInsideNestedRepo {
                                      path: path.to_owned(),
                                      nested_repo: base,
                                  });
                              }
                          }
                      }
                      let parts = &parts[..parts.len().saturating_sub(1)];
                      // We don't want to add "foo/bar/baz" to `audited_dirs` before checking
                      // if there's a "foo/.hg" directory. This also means we won't
                      // accidentally traverse a symlink into some other filesystem (which
                      // is potentially expensive to access).
                      for index in 0..parts.len() {
                          let prefix = &parts[..=index].join(&b'/');
                          let prefix = HgPath::new(prefix);
                          if self.audited_dirs.read().unwrap().contains(prefix) {
                              continue;
                          }
-                         self.check_filesystem(&prefix, &path)?;
+                         self.check_filesystem(prefix, path)?;
                          self.audited_dirs.write().unwrap().insert(prefix.to_owned());
                      }
                      self.audited.lock().unwrap().insert(path.to_owned());
                      Ok(())
                  }
                  pub fn check_filesystem(
                      &self,
                      prefix: impl AsRef<HgPath>,
                      path: impl AsRef<HgPath>,
                  ) -> Result<(), HgPathError> {
                      let prefix = prefix.as_ref();
                      let path = path.as_ref();
                      let current_path = self.root.join(
                          hg_path_to_path_buf(prefix)
                              .map_err(|_| HgPathError::NotFsCompliant(path.to_owned()))?,
                      );
                      match std::fs::symlink_metadata(&current_path) {
                          Err(e) => {
                              // EINVAL can be raised as invalid path syntax under win32.
                              if e.kind() != std::io::ErrorKind::NotFound
                                  && e.kind() != std::io::ErrorKind::InvalidInput
                                  && e.raw_os_error() != Some(20)
                              {
                                  // Rust does not yet have an `ErrorKind` for
                                  // `NotADirectory` (errno 20)
                                  // It happens if the dirstate contains `foo/bar` and
                                  // foo is not a directory
                                  return Err(HgPathError::NotFsCompliant(path.to_owned()));
                              }
                          }
                          Ok(meta) => {
                              if meta.file_type().is_symlink() {
                                  return Err(HgPathError::TraversesSymbolicLink {
                                      path: path.to_owned(),
                                      symlink: prefix.to_owned(),
                                  });
                              }
                              if meta.file_type().is_dir()
                                  && current_path.join(".hg").is_dir()
                              {
                                  return Err(HgPathError::IsInsideNestedRepo {
                                      path: path.to_owned(),
                                      nested_repo: prefix.to_owned(),
                                  });
                              }
                          }
                      };
                      Ok(())
                  }
                  pub fn check(&self, path: impl AsRef<HgPath>) -> bool {
                      self.audit_path(path).is_ok()
                  }
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  use std::fs::{create_dir, File};
                  use tempfile::tempdir;
                  #[test]
                  fn test_path_auditor() {
                      let base_dir = tempdir().unwrap();
                      let base_dir_path = base_dir.path();
                      let auditor = PathAuditor::new(base_dir_path);
                      let path = HgPath::new(b".hg/00changelog.i");
                      assert_eq!(
                          auditor.audit_path(path),
                          Err(HgPathError::InsideDotHg(path.to_owned()))
                      );
                      let path = HgPath::new(b"this/is/nested/.hg/thing.txt");
                      assert_eq!(
                          auditor.audit_path(path),
                          Err(HgPathError::IsInsideNestedRepo {
                              path: path.to_owned(),
                              nested_repo: HgPathBuf::from_bytes(b"this/is/nested")
                          })
                      );
-                     create_dir(&base_dir_path.join("realdir")).unwrap();
-                     File::create(&base_dir_path.join("realdir/realfile")).unwrap();
+                     create_dir(base_dir_path.join("realdir")).unwrap();
+                     File::create(base_dir_path.join("realdir/realfile")).unwrap();
                      // TODO make portable
                      std::os::unix::fs::symlink(
-                         &base_dir_path.join("realdir"),
-                         &base_dir_path.join("symlink"),
+                         base_dir_path.join("realdir"),
+                         base_dir_path.join("symlink"),
                      )
                      .unwrap();
                      let path = HgPath::new(b"symlink/realfile");
                      assert_eq!(
                          auditor.audit_path(path),
                          Err(HgPathError::TraversesSymbolicLink {
                              path: path.to_owned(),
                              symlink: HgPathBuf::from_bytes(b"symlink"),
                          })
                      );
                  }
              }

rust/hg-cpython/src/dirstate/status.rs

0 +2 -2

              // status.rs
              //
              // Copyright 2019, Raphaël Gomès <rgomes@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Bindings for the `hg::status` module provided by the
              //! `hg-core` crate. From Python, this will be seen as
              //! `rustext.dirstate.status`.
              use crate::{dirstate::DirstateMap, exceptions::FallbackError};
              use cpython::{
                  exc::ValueError, ObjectProtocol, PyBytes, PyErr, PyList, PyObject,
                  PyResult, PyTuple, Python, PythonObject, ToPyObject,
              };
              use hg::dirstate::status::StatusPath;
              use hg::matchers::{
                  DifferenceMatcher, IntersectionMatcher, Matcher, NeverMatcher,
                  UnionMatcher,
              };
              use hg::{
                  matchers::{AlwaysMatcher, FileMatcher, IncludeMatcher},
                  parse_pattern_syntax,
                  utils::{
                      files::{get_bytes_from_path, get_path_from_bytes},
                      hg_path::{HgPath, HgPathBuf},
                  },
                  BadMatch, DirstateStatus, IgnorePattern, PatternFileWarning, StatusError,
                  StatusOptions,
              };
              use std::borrow::Borrow;
              fn collect_status_path_list(py: Python, paths: &[StatusPath<'_>]) -> PyList {
                  collect_pybytes_list(py, paths.iter().map(|item| &*item.path))
              }
              /// This will be useless once trait impls for collection are added to `PyBytes`
              /// upstream.
              fn collect_pybytes_list(
                  py: Python,
                  iter: impl Iterator<Item = impl AsRef<HgPath>>,
              ) -> PyList {
                  let list = PyList::new(py, &[]);
                  for path in iter {
                      list.append(
                          py,
                          PyBytes::new(py, path.as_ref().as_bytes()).into_object(),
                      )
                  }
                  list
              }
              fn collect_bad_matches(
                  py: Python,
                  collection: &[(impl AsRef<HgPath>, BadMatch)],
              ) -> PyResult<PyList> {
                  let list = PyList::new(py, &[]);
                  let os = py.import("os")?;
                  let get_error_message = |code: i32| -> PyResult<_> {
                      os.call(
                          py,
                          "strerror",
                          PyTuple::new(py, &[code.to_py_object(py).into_object()]),
                          None,
                      )
                  };
                  for (path, bad_match) in collection.iter() {
                      let message = match bad_match {
                          BadMatch::OsError(code) => get_error_message(*code)?,
                          BadMatch::BadType(bad_type) => {
                              format!("unsupported file type (type is {})", bad_type)
                                  .to_py_object(py)
                                  .into_object()
                          }
                      };
                      list.append(
                          py,
                          (PyBytes::new(py, path.as_ref().as_bytes()), message)
                              .to_py_object(py)
                              .into_object(),
                      )
                  }
                  Ok(list)
              }
              fn handle_fallback(py: Python, err: StatusError) -> PyErr {
                  match err {
                      StatusError::Pattern(e) => {
                          let as_string = e.to_string();
                          log::trace!("Rust status fallback: `{}`", &as_string);
                          PyErr::new::<FallbackError, _>(py, &as_string)
                      }
                      e => PyErr::new::<ValueError, _>(py, e.to_string()),
                  }
              }
              pub fn status_wrapper(
                  py: Python,
                  dmap: DirstateMap,
                  matcher: PyObject,
                  root_dir: PyObject,
                  ignore_files: PyList,
                  check_exec: bool,
                  list_clean: bool,
                  list_ignored: bool,
                  list_unknown: bool,
                  collect_traversed_dirs: bool,
              ) -> PyResult<PyTuple> {
                  let bytes = root_dir.extract::<PyBytes>(py)?;
                  let root_dir = get_path_from_bytes(bytes.data(py));
                  let dmap: DirstateMap = dmap.to_py_object(py);
                  let mut dmap = dmap.get_inner_mut(py);
                  let ignore_files: PyResult<Vec<_>> = ignore_files
                      .iter(py)
                      .map(|b| {
                          let file = b.extract::<PyBytes>(py)?;
                          Ok(get_path_from_bytes(file.data(py)).to_owned())
                      })
                      .collect();
                  let ignore_files = ignore_files?;
                  // The caller may call `copymap.items()` separately
                  let list_copies = false;
                  let after_status = |res: Result<(DirstateStatus<'_>, _), StatusError>| {
                      let (status_res, warnings) =
                          res.map_err(|e| handle_fallback(py, e))?;
                      build_response(py, status_res, warnings)
                  };
                  let matcher = extract_matcher(py, matcher)?;
                  dmap.with_status(
                      &*matcher,
                      root_dir.to_path_buf(),
                      ignore_files,
                      StatusOptions {
                          check_exec,
                          list_clean,
                          list_ignored,
                          list_unknown,
                          list_copies,
                          collect_traversed_dirs,
                      },
                      after_status,
                  )
              }
              /// Transform a Python matcher into a Rust matcher.
              fn extract_matcher(
                  py: Python,
                  matcher: PyObject,
              ) -> PyResult<Box<dyn Matcher + Sync>> {
                  match matcher.get_type(py).name(py).borrow() {
                      "alwaysmatcher" => Ok(Box::new(AlwaysMatcher)),
                      "nevermatcher" => Ok(Box::new(NeverMatcher)),
                      "exactmatcher" => {
                          let files = matcher.call_method(
                              py,
                              "files",
                              PyTuple::new(py, &[]),
                              None,
                          )?;
                          let files: PyList = files.cast_into(py)?;
                          let files: PyResult<Vec<HgPathBuf>> = files
                              .iter(py)
                              .map(|f| {
                                  Ok(HgPathBuf::from_bytes(
                                      f.extract::<PyBytes>(py)?.data(py),
                                  ))
                              })
                              .collect();
                          let files = files?;
                          let file_matcher = FileMatcher::new(files)
                              .map_err(|e| PyErr::new::<ValueError, _>(py, e.to_string()))?;
                          Ok(Box::new(file_matcher))
                      }
                      "includematcher" => {
                          // Get the patterns from Python even though most of them are
                          // redundant with those we will parse later on, as they include
                          // those passed from the command line.
                          let ignore_patterns: PyResult<Vec<_>> = matcher
                              .getattr(py, "_kindpats")?
                              .iter(py)?
                              .map(|k| {
                                  let k = k?;
                                  let syntax = parse_pattern_syntax(
                                      &[
                                          k.get_item(py, 0)?
                                              .extract::<PyBytes>(py)?
                                              .data(py),
                                          &b":"[..],
                                      ]
                                      .concat(),
                                  )
                                  .map_err(|e| {
                                      handle_fallback(py, StatusError::Pattern(e))
                                  })?;
                                  let pattern = k.get_item(py, 1)?.extract::<PyBytes>(py)?;
                                  let pattern = pattern.data(py);
                                  let source = k.get_item(py, 2)?.extract::<PyBytes>(py)?;
                                  let source = get_path_from_bytes(source.data(py));
                                  let new = IgnorePattern::new(syntax, pattern, source);
                                  Ok(new)
                              })
                              .collect();
                          let ignore_patterns = ignore_patterns?;
                          let matcher = IncludeMatcher::new(ignore_patterns)
                              .map_err(|e| handle_fallback(py, e.into()))?;
                          Ok(Box::new(matcher))
                      }
                      "unionmatcher" => {
                          let matchers: PyResult<Vec<_>> = matcher
                              .getattr(py, "_matchers")?
                              .iter(py)?
                              .map(|py_matcher| extract_matcher(py, py_matcher?))
                              .collect();
                          Ok(Box::new(UnionMatcher::new(matchers?)))
                      }
                      "intersectionmatcher" => {
                          let m1 = extract_matcher(py, matcher.getattr(py, "_m1")?)?;
                          let m2 = extract_matcher(py, matcher.getattr(py, "_m2")?)?;
                          Ok(Box::new(IntersectionMatcher::new(m1, m2)))
                      }
                      "differencematcher" => {
                          let m1 = extract_matcher(py, matcher.getattr(py, "_m1")?)?;
                          let m2 = extract_matcher(py, matcher.getattr(py, "_m2")?)?;
                          Ok(Box::new(DifferenceMatcher::new(m1, m2)))
                      }
                      e => Err(PyErr::new::<FallbackError, _>(
                          py,
                          format!("Unsupported matcher {}", e),
                      )),
                  }
              }
              fn build_response(
                  py: Python,
                  status_res: DirstateStatus,
                  warnings: Vec<PatternFileWarning>,
              ) -> PyResult<PyTuple> {
                  let modified = collect_status_path_list(py, &status_res.modified);
                  let added = collect_status_path_list(py, &status_res.added);
                  let removed = collect_status_path_list(py, &status_res.removed);
                  let deleted = collect_status_path_list(py, &status_res.deleted);
                  let clean = collect_status_path_list(py, &status_res.clean);
                  let ignored = collect_status_path_list(py, &status_res.ignored);
                  let unknown = collect_status_path_list(py, &status_res.unknown);
                  let unsure = collect_status_path_list(py, &status_res.unsure);
                  let bad = collect_bad_matches(py, &status_res.bad)?;
                  let traversed = collect_pybytes_list(py, status_res.traversed.iter());
                  let dirty = status_res.dirty.to_py_object(py);
                  let py_warnings = PyList::new(py, &[]);
                  for warning in warnings.iter() {
                      // We use duck-typing on the Python side for dispatch, good enough for
                      // now.
                      match warning {
                          PatternFileWarning::InvalidSyntax(file, syn) => {
                              py_warnings.append(
                                  py,
                                  (
-                                     PyBytes::new(py, &get_bytes_from_path(&file)),
+                                     PyBytes::new(py, &get_bytes_from_path(file)),
                                      PyBytes::new(py, syn),
                                  )
                                      .to_py_object(py)
                                      .into_object(),
                              );
                          }
                          PatternFileWarning::NoSuchFile(file) => py_warnings.append(
                              py,
-                             PyBytes::new(py, &get_bytes_from_path(&file)).into_object(),
+                             PyBytes::new(py, &get_bytes_from_path(file)).into_object(),
                          ),
                      }
                  }
                  Ok(PyTuple::new(
                      py,
                      &[
                          unsure.into_object(),
                          modified.into_object(),
                          added.into_object(),
                          removed.into_object(),
                          deleted.into_object(),
                          clean.into_object(),
                          ignored.into_object(),
                          unknown.into_object(),
                          py_warnings.into_object(),
                          bad.into_object(),
                          traversed.into_object(),
                          dirty.into_object(),
                      ][..],
                  ))
              }

rust/hg-cpython/src/revlog.rs

0 +2 -2

              // revlog.rs
              //
              // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              use crate::{
                  cindex,
                  utils::{node_from_py_bytes, node_from_py_object},
                  PyRevision,
              };
              use cpython::{
                  buffer::{Element, PyBuffer},
                  exc::{IndexError, ValueError},
                  ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule,
                  PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
              };
              use hg::{
                  nodemap::{Block, NodeMapError, NodeTree},
                  revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex},
                  BaseRevision, Revision, UncheckedRevision,
              };
              use std::cell::RefCell;
              /// Return a Struct implementing the Graph trait
              pub(crate) fn pyindex_to_graph(
                  py: Python,
                  index: PyObject,
              ) -> PyResult<cindex::Index> {
                  match index.extract::<MixedIndex>(py) {
                      Ok(midx) => Ok(midx.clone_cindex(py)),
                      Err(_) => cindex::Index::new(py, index),
                  }
              }
              py_class!(pub class MixedIndex |py| {
                  data cindex: RefCell<cindex::Index>;
                  data nt: RefCell<Option<NodeTree>>;
                  data docket: RefCell<Option<PyObject>>;
                  // Holds a reference to the mmap'ed persistent nodemap data
                  data mmap: RefCell<Option<PyBuffer>>;
                  def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> {
                      Self::new(py, cindex)
                  }
                  /// Compatibility layer used for Python consumers needing access to the C index
                  ///
                  /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
                  /// that may need to build a custom `nodetree`, based on a specified revset.
                  /// With a Rust implementation of the nodemap, we will be able to get rid of
                  /// this, by exposing our own standalone nodemap class,
                  /// ready to accept `MixedIndex`.
                  def get_cindex(&self) -> PyResult<PyObject> {
                      Ok(self.cindex(py).borrow().inner().clone_ref(py))
                  }
                  // Index API involving nodemap, as defined in mercurial/pure/parsers.py
                  /// Return Revision if found, raises a bare `error.RevlogError`
                  /// in case of ambiguity, same as C version does
                  def get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> {
                      let opt = self.get_nodetree(py)?.borrow();
                      let nt = opt.as_ref().unwrap();
                      let idx = &*self.cindex(py).borrow();
                      let node = node_from_py_bytes(py, &node)?;
                      let res = nt.find_bin(idx, node.into());
                      Ok(res.map_err(|e| nodemap_error(py, e))?.map(Into::into))
                  }
                  /// same as `get_rev()` but raises a bare `error.RevlogError` if node
                  /// is not found.
                  ///
                  /// No need to repeat `node` in the exception, `mercurial/revlog.py`
                  /// will catch and rewrap with it
                  def rev(&self, node: PyBytes) -> PyResult<PyRevision> {
                      self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
                  }
                  /// return True if the node exist in the index
                  def has_node(&self, node: PyBytes) -> PyResult<bool> {
                      self.get_rev(py, node).map(|opt| opt.is_some())
                  }
                  /// find length of shortest hex nodeid of a binary ID
                  def shortest(&self, node: PyBytes) -> PyResult<usize> {
                      let opt = self.get_nodetree(py)?.borrow();
                      let nt = opt.as_ref().unwrap();
                      let idx = &*self.cindex(py).borrow();
                      match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
                      {
                          Ok(Some(l)) => Ok(l),
                          Ok(None) => Err(revlog_error(py)),
                          Err(e) => Err(nodemap_error(py, e)),
                      }
                  }
                  def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
                      let opt = self.get_nodetree(py)?.borrow();
                      let nt = opt.as_ref().unwrap();
                      let idx = &*self.cindex(py).borrow();
                      let node_as_string = if cfg!(feature = "python3-sys") {
                          node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
                      }
                      else {
                          let node = node.extract::<PyBytes>(py)?;
                          String::from_utf8_lossy(node.data(py)).to_string()
                      };
                      let prefix = NodePrefix::from_hex(&node_as_string)
                          .map_err(|_| PyErr::new::<ValueError, _>(
                              py, format!("Invalid node or prefix '{}'", node_as_string))
                          )?;
                      nt.find_bin(idx, prefix)
                          // TODO make an inner API returning the node directly
                          .map(|opt| opt.map(
                              |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
                          .map_err(|e| nodemap_error(py, e))
                  }
                  /// append an index entry
                  def append(&self, tup: PyTuple) -> PyResult<PyObject> {
                      if tup.len(py) < 8 {
                          // this is better than the panic promised by tup.get_item()
                          return Err(
                              PyErr::new::<IndexError, _>(py, "tuple index out of range"))
                      }
                      let node_bytes = tup.get_item(py, 7).extract(py)?;
                      let node = node_from_py_object(py, &node_bytes)?;
                      let mut idx = self.cindex(py).borrow_mut();
                      // This is ok since we will just add the revision to the index
                      let rev = Revision(idx.len() as BaseRevision);
                      idx.append(py, tup)?;
                      self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
                          .insert(&*idx, &node, rev)
                          .map_err(|e| nodemap_error(py, e))?;
                      Ok(py.None())
                  }
                  def __delitem__(&self, key: PyObject) -> PyResult<()> {
                      // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
                      self.cindex(py).borrow().inner().del_item(py, key)?;
                      let mut opt = self.get_nodetree(py)?.borrow_mut();
                      let nt = opt.as_mut().unwrap();
                      nt.invalidate_all();
                      self.fill_nodemap(py, nt)?;
                      Ok(())
                  }
                  //
                  // Reforwarded C index API
                  //
                  // index_methods (tp_methods). Same ordering as in revlog.c
                  /// return the gca set of the given revs
                  def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
                      self.call_cindex(py, "ancestors", args, kw)
                  }
                  /// return the heads of the common ancestors of the given revs
                  def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
                      self.call_cindex(py, "commonancestorsheads", args, kw)
                  }
                  /// Clear the index caches and inner py_class data.
                  /// It is Python's responsibility to call `update_nodemap_data` again.
                  def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
                      self.nt(py).borrow_mut().take();
                      self.docket(py).borrow_mut().take();
                      self.mmap(py).borrow_mut().take();
                      self.call_cindex(py, "clearcaches", args, kw)
                  }
                  /// return the raw binary string representing a revision
                  def entry_binary(&self, *args, **kw) -> PyResult<PyObject> {
                      self.call_cindex(py, "entry_binary", args, kw)
                  }
                  /// return a binary packed version of the header
                  def pack_header(&self, *args, **kw) -> PyResult<PyObject> {
                      self.call_cindex(py, "pack_header", args, kw)
                  }
                  /// get an index entry
                  def get(&self, *args, **kw) -> PyResult<PyObject> {
                      self.call_cindex(py, "get", args, kw)
                  }
                  /// compute phases
                  def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
                      self.call_cindex(py, "computephasesmapsets", args, kw)
                  }
                  /// reachableroots
                  def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
                      self.call_cindex(py, "reachableroots2", args, kw)
                  }
                  /// get head revisions
                  def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
                      self.call_cindex(py, "headrevs", args, kw)
                  }
                  /// get filtered head revisions
                  def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
                      self.call_cindex(py, "headrevsfiltered", args, kw)
                  }
                  /// True if the object is a snapshot
                  def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
                      self.call_cindex(py, "issnapshot", args, kw)
                  }
                  /// Gather snapshot data in a cache dict
                  def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
                      self.call_cindex(py, "findsnapshots", args, kw)
                  }
                  /// determine revisions with deltas to reconstruct fulltext
                  def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
                      self.call_cindex(py, "deltachain", args, kw)
                  }
                  /// slice planned chunk read to reach a density threshold
                  def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
                      self.call_cindex(py, "slicechunktodensity", args, kw)
                  }
                  /// stats for the index
                  def stats(&self, *args, **kw) -> PyResult<PyObject> {
                      self.call_cindex(py, "stats", args, kw)
                  }
                  // index_sequence_methods and index_mapping_methods.
                  //
                  // Since we call back through the high level Python API,
                  // there's no point making a distinction between index_get
                  // and index_getitem.
                  def __len__(&self) -> PyResult<usize> {
                      self.cindex(py).borrow().inner().len(py)
                  }
                  def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
                      // this conversion seems needless, but that's actually because
                      // `index_getitem` does not handle conversion from PyLong,
                      // which expressions such as [e for e in index] internally use.
                      // Note that we don't seem to have a direct way to call
                      // PySequence_GetItem (does the job), which would possibly be better
                      // for performance
                      let key = match key.extract::<i32>(py) {
                          Ok(rev) => rev.to_py_object(py).into_object(),
                          Err(_) => key,
                      };
                      self.cindex(py).borrow().inner().get_item(py, key)
                  }
                  def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
                      self.cindex(py).borrow().inner().set_item(py, key, value)
                  }
                  def __contains__(&self, item: PyObject) -> PyResult<bool> {
                      // ObjectProtocol does not seem to provide contains(), so
                      // this is an equivalent implementation of the index_contains()
                      // defined in revlog.c
                      let cindex = self.cindex(py).borrow();
                      match item.extract::<i32>(py) {
                          Ok(rev) => {
                              Ok(rev >= -1 && rev < cindex.inner().len(py)? as BaseRevision)
                          }
                          Err(_) => {
                              cindex.inner().call_method(
                                  py,
                                  "has_node",
                                  PyTuple::new(py, &[item]),
                                  None)?
                              .extract(py)
                          }
                      }
                  }
                  def nodemap_data_all(&self) -> PyResult<PyBytes> {
                      self.inner_nodemap_data_all(py)
                  }
                  def nodemap_data_incremental(&self) -> PyResult<PyObject> {
                      self.inner_nodemap_data_incremental(py)
                  }
                  def update_nodemap_data(
                      &self,
                      docket: PyObject,
                      nm_data: PyObject
                  ) -> PyResult<PyObject> {
                      self.inner_update_nodemap_data(py, docket, nm_data)
                  }
                  @property
                  def entry_size(&self) -> PyResult<PyInt> {
                      self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
                  }
                  @property
                  def rust_ext_compat(&self) -> PyResult<PyInt> {
                      self.cindex(py).borrow().inner().getattr(py, "rust_ext_compat")?.extract::<PyInt>(py)
                  }
              });
              impl MixedIndex {
                  fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> {
                      Self::create_instance(
                          py,
                          RefCell::new(cindex::Index::new(py, cindex)?),
                          RefCell::new(None),
                          RefCell::new(None),
                          RefCell::new(None),
                      )
                  }
                  /// This is scaffolding at this point, but it could also become
                  /// a way to start a persistent nodemap or perform a
                  /// vacuum / repack operation
                  fn fill_nodemap(
                      &self,
                      py: Python,
                      nt: &mut NodeTree,
                  ) -> PyResult<PyObject> {
                      let index = self.cindex(py).borrow();
                      for r in 0..index.len() {
                          let rev = Revision(r as BaseRevision);
                          // in this case node() won't ever return None
                          nt.insert(&*index, index.node(rev).unwrap(), rev)
                              .map_err(|e| nodemap_error(py, e))?
                      }
                      Ok(py.None())
                  }
                  fn get_nodetree<'a>(
                      &'a self,
                      py: Python<'a>,
                  ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
                      if self.nt(py).borrow().is_none() {
-                         let readonly = Box::new(Vec::new());
+                         let readonly = Box::<Vec<_>>::default();
                          let mut nt = NodeTree::load_bytes(readonly, 0);
                          self.fill_nodemap(py, &mut nt)?;
                          self.nt(py).borrow_mut().replace(nt);
                      }
                      Ok(self.nt(py))
                  }
                  /// forward a method call to the underlying C index
                  fn call_cindex(
                      &self,
                      py: Python,
                      name: &str,
                      args: &PyTuple,
                      kwargs: Option<&PyDict>,
                  ) -> PyResult<PyObject> {
                      self.cindex(py)
                          .borrow()
                          .inner()
                          .call_method(py, name, args, kwargs)
                  }
                  pub fn clone_cindex(&self, py: Python) -> cindex::Index {
                      self.cindex(py).borrow().clone_ref(py)
                  }
                  /// Returns the full nodemap bytes to be written as-is to disk
                  fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
                      let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
                      let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
                      // If there's anything readonly, we need to build the data again from
                      // scratch
                      let bytes = if readonly.len() > 0 {
-                         let mut nt = NodeTree::load_bytes(Box::new(vec![]), 0);
+                         let mut nt = NodeTree::load_bytes(Box::<Vec<_>>::default(), 0);
                          self.fill_nodemap(py, &mut nt)?;
                          let (readonly, bytes) = nt.into_readonly_and_added_bytes();
                          assert_eq!(readonly.len(), 0);
                          bytes
                      } else {
                          bytes
                      };
                      let bytes = PyBytes::new(py, &bytes);
                      Ok(bytes)
                  }
                  /// Returns the last saved docket along with the size of any changed data
                  /// (in number of blocks), and said data as bytes.
                  fn inner_nodemap_data_incremental(
                      &self,
                      py: Python,
                  ) -> PyResult<PyObject> {
                      let docket = self.docket(py).borrow();
                      let docket = match docket.as_ref() {
                          Some(d) => d,
                          None => return Ok(py.None()),
                      };
                      let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
                      let masked_blocks = node_tree.masked_readonly_blocks();
                      let (_, data) = node_tree.into_readonly_and_added_bytes();
                      let changed = masked_blocks * std::mem::size_of::<Block>();
                      Ok((docket, changed, PyBytes::new(py, &data))
                          .to_py_object(py)
                          .into_object())
                  }
                  /// Update the nodemap from the new (mmaped) data.
                  /// The docket is kept as a reference for later incremental calls.
                  fn inner_update_nodemap_data(
                      &self,
                      py: Python,
                      docket: PyObject,
                      nm_data: PyObject,
                  ) -> PyResult<PyObject> {
                      let buf = PyBuffer::get(py, &nm_data)?;
                      let len = buf.item_count();
                      // Build a slice from the mmap'ed buffer data
                      let cbuf = buf.buf_ptr();
                      let bytes = if std::mem::size_of::<u8>() == buf.item_size()
                          && buf.is_c_contiguous()
                          && u8::is_compatible_format(buf.format())
                      {
                          unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
                      } else {
                          return Err(PyErr::new::<ValueError, _>(
                              py,
                              "Nodemap data buffer has an invalid memory representation"
                                  .to_string(),
                          ));
                      };
                      // Keep a reference to the mmap'ed buffer, otherwise we get a dangling
                      // pointer.
                      self.mmap(py).borrow_mut().replace(buf);
                      let mut nt = NodeTree::load_bytes(Box::new(bytes), len);
                      let data_tip = docket
                          .getattr(py, "tip_rev")?
                          .extract::<BaseRevision>(py)?
                          .into();
                      self.docket(py).borrow_mut().replace(docket.clone_ref(py));
                      let idx = self.cindex(py).borrow();
                      let data_tip = idx.check_revision(data_tip).ok_or_else(|| {
                          nodemap_error(py, NodeMapError::RevisionNotInIndex(data_tip))
                      })?;
                      let current_tip = idx.len();
                      for r in (data_tip.0 + 1)..current_tip as BaseRevision {
                          let rev = Revision(r);
                          // in this case node() won't ever return None
                          nt.insert(&*idx, idx.node(rev).unwrap(), rev)
                              .map_err(|e| nodemap_error(py, e))?
                      }
                      *self.nt(py).borrow_mut() = Some(nt);
                      Ok(py.None())
                  }
              }
              fn revlog_error(py: Python) -> PyErr {
                  match py
                      .import("mercurial.error")
                      .and_then(|m| m.get(py, "RevlogError"))
                  {
                      Err(e) => e,
                      Ok(cls) => PyErr::from_instance(
                          py,
                          cls.call(py, (py.None(),), None).ok().into_py_object(py),
                      ),
                  }
              }
              fn rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr {
                  PyErr::new::<ValueError, _>(
                      py,
                      format!(
                          "Inconsistency: Revision {} found in nodemap \
                           is not in revlog index",
                          rev
                      ),
                  )
              }
              /// Standard treatment of NodeMapError
              fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
                  match err {
                      NodeMapError::MultipleResults => revlog_error(py),
                      NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
                  }
              }
              /// Create the module, with __package__ given from parent
              pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
                  let dotted_name = &format!("{}.revlog", package);
                  let m = PyModule::new(py, dotted_name)?;
                  m.add(py, "__package__", package)?;
                  m.add(py, "__doc__", "RevLog - Rust implementations")?;
                  m.add_class::<MixedIndex>(py)?;
                  let sys = PyModule::import(py, "sys")?;
                  let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
                  sys_modules.set_item(py, dotted_name, &m)?;
                  Ok(m)
              }

rust/rhg/src/commands/cat.rs

0 +1 -1

              use crate::error::CommandError;
              use clap::Arg;
              use format_bytes::format_bytes;
              use hg::operations::cat;
              use hg::utils::hg_path::HgPathBuf;
              use std::ffi::OsString;
              use std::os::unix::prelude::OsStrExt;
              pub const HELP_TEXT: &str = "
              Output the current or given revision of files
              ";
              pub fn args() -> clap::Command {
                  clap::command!("cat")
                      .arg(
                          Arg::new("rev")
                              .help("search the repository as it is in REV")
                              .short('r')
                              .long("rev")
                              .value_name("REV"),
                      )
                      .arg(
                          clap::Arg::new("files")
                              .required(true)
                              .num_args(1..)
                              .value_name("FILE")
                              .value_parser(clap::value_parser!(std::ffi::OsString))
                              .help("Files to output"),
                      )
                      .about(HELP_TEXT)
              }
              #[logging_timer::time("trace")]
              pub fn run(invocation: &crate::CliInvocation) -> Result<(), CommandError> {
                  let cat_enabled = invocation.config.get_bool(b"rhg", b"cat")?;
                  if !cat_enabled {
                      return Err(CommandError::unsupported(
                          "cat is disabled in rhg (enable it with 'rhg.cat = true' \
                          or enable fallback with 'rhg.on-unsupported = fallback')",
                      ));
                  }
                  let rev = invocation.subcommand_args.get_one::<String>("rev");
                  let file_args =
                      match invocation.subcommand_args.get_many::<OsString>("files") {
                          Some(files) => files
                              .filter(|s| !s.is_empty())
                              .map(|s| s.as_os_str())
                              .collect(),
                          None => vec![],
                      };
                  let repo = invocation.repo?;
                  let cwd = hg::utils::current_dir()?;
                  let working_directory = repo.working_directory_path();
                  let working_directory = cwd.join(working_directory); // Make it absolute
                  let mut files = vec![];
                  for file in file_args {
                      if file.as_bytes().starts_with(b"set:") {
                          let message = "fileset";
                          return Err(CommandError::unsupported(message));
                      }
-                     let normalized = cwd.join(&file);
+                     let normalized = cwd.join(file);
                      // TODO: actually normalize `..` path segments etc?
                      let dotted = normalized.components().any(|c| c.as_os_str() == "..");
                      if file.as_bytes() == b"." || dotted {
                          let message = "`..` or `.` path segment";
                          return Err(CommandError::unsupported(message));
                      }
                      let relative_path = working_directory
                          .strip_prefix(&cwd)
                          .unwrap_or(&working_directory);
                      let stripped = normalized
                          .strip_prefix(&working_directory)
                          .map_err(|_| {
                              CommandError::abort(format!(
                                  "abort: {} not under root '{}'\n(consider using '--cwd {}')",
                                  String::from_utf8_lossy(file.as_bytes()),
                                  working_directory.display(),
                                  relative_path.display(),
                              ))
                          })?;
                      let hg_file = HgPathBuf::try_from(stripped.to_path_buf())
                          .map_err(|e| CommandError::abort(e.to_string()))?;
                      files.push(hg_file);
                  }
                  let files = files.iter().map(|file| file.as_ref()).collect();
                  // TODO probably move this to a util function like `repo.default_rev` or
                  // something when it's used somewhere else
                  let rev = match rev {
                      Some(r) => r.to_string(),
                      None => format!("{:x}", repo.dirstate_parents()?.p1),
                  };
                  let output = cat(repo, &rev, files).map_err(|e| (e, rev.as_str()))?;
                  for (_file, contents) in output.results {
                      invocation.ui.write_stdout(&contents)?;
                  }
                  if !output.missing.is_empty() {
                      let short = format!("{:x}", output.node.short()).into_bytes();
                      for path in &output.missing {
                          invocation.ui.write_stderr(&format_bytes!(
                              b"{}: no such file in rev {}\n",
                              path.as_bytes(),
                              short
                          ))?;
                      }
                  }
                  if output.found_any {
                      Ok(())
                  } else {
                      Err(CommandError::Unsuccessful)
                  }
              }

rust/rhg/src/commands/debugdata.rs

0 +1 -1

              use crate::error::CommandError;
              use clap::Arg;
              use clap::ArgGroup;
              use hg::operations::{debug_data, DebugDataKind};
              pub const HELP_TEXT: &str = "
              Dump the contents of a data file revision
              ";
              pub fn args() -> clap::Command {
                  clap::command!("debugdata")
                      .arg(
                          Arg::new("changelog")
                              .help("open changelog")
                              .short('c')
                              .action(clap::ArgAction::SetTrue),
                      )
                      .arg(
                          Arg::new("manifest")
                              .help("open manifest")
                              .short('m')
                              .action(clap::ArgAction::SetTrue),
                      )
                      .group(
                          ArgGroup::new("revlog")
-                             .args(&["changelog", "manifest"])
+                             .args(["changelog", "manifest"])
                              .required(true),
                      )
                      .arg(
                          Arg::new("rev")
                              .help("revision")
                              .required(true)
                              .value_name("REV"),
                      )
                      .about(HELP_TEXT)
              }
              #[logging_timer::time("trace")]
              pub fn run(invocation: &crate::CliInvocation) -> Result<(), CommandError> {
                  let args = invocation.subcommand_args;
                  let rev = args
                      .get_one::<String>("rev")
                      .expect("rev should be a required argument");
                  let kind = match (
                      args.get_one::<bool>("changelog").unwrap(),
                      args.get_one::<bool>("manifest").unwrap(),
                  ) {
                      (true, false) => DebugDataKind::Changelog,
                      (false, true) => DebugDataKind::Manifest,
                      (true, true) => {
                          unreachable!("Should not happen since options are exclusive")
                      }
                      (false, false) => {
                          unreachable!("Should not happen since options are required")
                      }
                  };
                  let repo = invocation.repo?;
                  if repo.has_narrow() {
                      return Err(CommandError::unsupported(
                          "support for ellipsis nodes is missing and repo has narrow enabled",
                      ));
                  }
                  let data = debug_data(repo, rev, kind).map_err(|e| (e, rev.as_ref()))?;
                  let mut stdout = invocation.ui.stdout_buffer();
                  stdout.write_all(&data)?;
                  stdout.flush()?;
                  Ok(())
              }

rust/rhg/src/commands/root.rs

0 +1 -1

              use crate::error::CommandError;
              use format_bytes::format_bytes;
              use hg::errors::{IoErrorContext, IoResultExt};
              use hg::utils::files::get_bytes_from_path;
              pub const HELP_TEXT: &str = "
              Print the root directory of the current repository.
              Returns 0 on success.
              ";
              pub fn args() -> clap::Command {
                  clap::command!("root").about(HELP_TEXT)
              }
              pub fn run(invocation: &crate::CliInvocation) -> Result<(), CommandError> {
                  let repo = invocation.repo?;
                  let working_directory = repo.working_directory_path();
                  let working_directory = std::fs::canonicalize(working_directory)
                      .with_context(|| {
                          IoErrorContext::CanonicalizingPath(working_directory.to_owned())
                      })?;
-                 let bytes = get_bytes_from_path(&working_directory);
+                 let bytes = get_bytes_from_path(working_directory);
                  invocation
                      .ui
                      .write_stdout(&format_bytes!(b"{}\n", bytes.as_slice()))?;
                  Ok(())
              }

rust/rhg/src/main.rs

0 +2 -2

              extern crate log;
              use crate::error::CommandError;
              use crate::ui::{local_to_utf8, Ui};
              use clap::{command, Arg, ArgMatches};
              use format_bytes::{format_bytes, join};
              use hg::config::{Config, ConfigSource, PlainInfo};
              use hg::repo::{Repo, RepoError};
              use hg::utils::files::{get_bytes_from_os_str, get_path_from_bytes};
              use hg::utils::SliceExt;
              use hg::{exit_codes, requirements};
              use std::borrow::Cow;
              use std::collections::HashSet;
              use std::ffi::OsString;
              use std::os::unix::prelude::CommandExt;
              use std::path::PathBuf;
              use std::process::Command;
              mod blackbox;
              mod color;
              mod error;
              mod ui;
              pub mod utils {
                  pub mod path_utils;
              }
              fn main_with_result(
                  argv: Vec<OsString>,
                  process_start_time: &blackbox::ProcessStartTime,
                  ui: &ui::Ui,
                  repo: Result<&Repo, &NoRepoInCwdError>,
                  config: &Config,
              ) -> Result<(), CommandError> {
                  check_unsupported(config, repo)?;
                  let app = command!()
                      .subcommand_required(true)
                      .arg(
                          Arg::new("repository")
                              .help("repository root directory")
                              .short('R')
                              .value_name("REPO")
                              // Both ok: `hg -R ./foo log` or `hg log -R ./foo`
                              .global(true),
                      )
                      .arg(
                          Arg::new("config")
                              .help("set/override config option (use 'section.name=value')")
                              .value_name("CONFIG")
                              .global(true)
                              .long("config")
                              // Ok: `--config section.key1=val --config section.key2=val2`
                              // Not ok: `--config section.key1=val section.key2=val2`
                              .action(clap::ArgAction::Append),
                      )
                      .arg(
                          Arg::new("cwd")
                              .help("change working directory")
                              .value_name("DIR")
                              .long("cwd")
                              .global(true),
                      )
                      .arg(
                          Arg::new("color")
                              .help("when to colorize (boolean, always, auto, never, or debug)")
                              .value_name("TYPE")
                              .long("color")
                              .global(true),
                      )
                      .version("0.0.1");
                  let app = add_subcommand_args(app);
                  let matches = app.try_get_matches_from(argv.iter())?;
                  let (subcommand_name, subcommand_args) =
                      matches.subcommand().expect("subcommand required");
                  // Mercurial allows users to define "defaults" for commands, fallback
                  // if a default is detected for the current command
                  let defaults = config.get_str(b"defaults", subcommand_name.as_bytes())?;
                  match defaults {
                      // Programmatic usage might set defaults to an empty string to unset
                      // it; allow that
                      None | Some("") => {}
                      Some(_) => {
                          let msg = "`defaults` config set";
                          return Err(CommandError::unsupported(msg));
                      }
                  }
                  for prefix in ["pre", "post", "fail"].iter() {
                      // Mercurial allows users to define generic hooks for commands,
                      // fallback if any are detected
                      let item = format!("{}-{}", prefix, subcommand_name);
                      let hook_for_command =
                          config.get_str_no_default(b"hooks", item.as_bytes())?;
                      if hook_for_command.is_some() {
                          let msg = format!("{}-{} hook defined", prefix, subcommand_name);
                          return Err(CommandError::unsupported(msg));
                      }
                  }
                  let run = subcommand_run_fn(subcommand_name)
                      .expect("unknown subcommand name from clap despite Command::subcommand_required");
                  let invocation = CliInvocation {
                      ui,
                      subcommand_args,
                      config,
                      repo,
                  };
                  if let Ok(repo) = repo {
                      // We don't support subrepos, fallback if the subrepos file is present
                      if repo.working_directory_vfs().join(".hgsub").exists() {
                          let msg = "subrepos (.hgsub is present)";
                          return Err(CommandError::unsupported(msg));
                      }
                  }
                  if config.is_extension_enabled(b"blackbox") {
                      let blackbox =
                          blackbox::Blackbox::new(&invocation, process_start_time)?;
                      blackbox.log_command_start(argv.iter());
                      let result = run(&invocation);
                      blackbox.log_command_end(
                          argv.iter(),
                          exit_code(
                              &result,
                              // TODO: show a warning or combine with original error if
                              // `get_bool` returns an error
                              config
                                  .get_bool(b"ui", b"detailed-exit-code")
                                  .unwrap_or(false),
                          ),
                      );
                      result
                  } else {
                      run(&invocation)
                  }
              }
              fn rhg_main(argv: Vec<OsString>) -> ! {
                  // Run this first, before we find out if the blackbox extension is even
                  // enabled, in order to include everything in-between in the duration
                  // measurements. Reading config files can be slow if they’re on NFS.
                  let process_start_time = blackbox::ProcessStartTime::now();
                  env_logger::init();
                  // Make sure nothing in a future version of `rhg` sets the global
                  // threadpool before we can cap default threads. (This is also called
                  // in core because Python uses the same code path, we're adding a
                  // redundant check.)
                  hg::utils::cap_default_rayon_threads()
                      .expect("Rayon threadpool already initialized");
                  let early_args = EarlyArgs::parse(&argv);
                  let initial_current_dir = early_args.cwd.map(|cwd| {
                      let cwd = get_path_from_bytes(&cwd);
                      std::env::current_dir()
                          .and_then(|initial| {
                              std::env::set_current_dir(cwd)?;
                              Ok(initial)
                          })
                          .unwrap_or_else(|error| {
                              exit(
                                  &argv,
                                  &None,
                                  &Ui::new_infallible(&Config::empty()),
                                  OnUnsupported::Abort,
                                  Err(CommandError::abort(format!(
                                      "abort: {}: '{}'",
                                      error,
                                      cwd.display()
                                  ))),
                                  false,
                              )
                          })
                  });
                  let mut non_repo_config =
                      Config::load_non_repo().unwrap_or_else(|error| {
                          // Normally this is decided based on config, but we don’t have that
                          // available. As of this writing config loading never returns an
                          // "unsupported" error but that is not enforced by the type system.
                          let on_unsupported = OnUnsupported::Abort;
                          exit(
                              &argv,
                              &initial_current_dir,
                              &Ui::new_infallible(&Config::empty()),
                              on_unsupported,
                              Err(error.into()),
                              false,
                          )
                      });
                  non_repo_config
                      .load_cli_args(early_args.config, early_args.color)
                      .unwrap_or_else(|error| {
                          exit(
                              &argv,
                              &initial_current_dir,
                              &Ui::new_infallible(&non_repo_config),
                              OnUnsupported::from_config(&non_repo_config),
                              Err(error.into()),
                              non_repo_config
                                  .get_bool(b"ui", b"detailed-exit-code")
                                  .unwrap_or(false),
                          )
                      });
                  if let Some(repo_path_bytes) = &early_args.repo {
                      lazy_static::lazy_static! {
                          static ref SCHEME_RE: regex::bytes::Regex =
                              // Same as `_matchscheme` in `mercurial/util.py`
                              regex::bytes::Regex::new("^[a-zA-Z0-9+.\\-]+:").unwrap();
                      }
                      if SCHEME_RE.is_match(repo_path_bytes) {
                          exit(
                              &argv,
                              &initial_current_dir,
                              &Ui::new_infallible(&non_repo_config),
                              OnUnsupported::from_config(&non_repo_config),
                              Err(CommandError::UnsupportedFeature {
                                  message: format_bytes!(
                                      b"URL-like --repository {}",
                                      repo_path_bytes
                                  ),
                              }),
                              // TODO: show a warning or combine with original error if
                              // `get_bool` returns an error
                              non_repo_config
                                  .get_bool(b"ui", b"detailed-exit-code")
                                  .unwrap_or(false),
                          )
                      }
                  }
                  let repo_arg = early_args.repo.unwrap_or_default();
                  let repo_path: Option<PathBuf> = {
                      if repo_arg.is_empty() {
                          None
                      } else {
                          let local_config = {
                              if std::env::var_os("HGRCSKIPREPO").is_none() {
                                  // TODO: handle errors from find_repo_root
                                  if let Ok(current_dir_path) = Repo::find_repo_root() {
                                      let config_files = vec![
                                          ConfigSource::AbsPath(
                                              current_dir_path.join(".hg/hgrc"),
                                          ),
                                          ConfigSource::AbsPath(
                                              current_dir_path.join(".hg/hgrc-not-shared"),
                                          ),
                                      ];
                                      // TODO: handle errors from
                                      // `load_from_explicit_sources`
                                      Config::load_from_explicit_sources(config_files).ok()
                                  } else {
                                      None
                                  }
                              } else {
                                  None
                              }
                          };
                          let non_repo_config_val = {
                              let non_repo_val = non_repo_config.get(b"paths", &repo_arg);
                              match &non_repo_val {
                                  Some(val) if !val.is_empty() => home::home_dir()
                                      .unwrap_or_else(|| PathBuf::from("~"))
                                      .join(get_path_from_bytes(val))
                                      .canonicalize()
                                      // TODO: handle error and make it similar to python
                                      // implementation maybe?
                                      .ok(),
                                  _ => None,
                              }
                          };
                          let config_val = match &local_config {
                              None => non_repo_config_val,
                              Some(val) => {
                                  let local_config_val = val.get(b"paths", &repo_arg);
                                  match &local_config_val {
                                      Some(val) if !val.is_empty() => {
                                          // presence of a local_config assures that
                                          // current_dir
                                          // wont result in an Error
                                          let canpath = hg::utils::current_dir()
                                              .unwrap()
                                              .join(get_path_from_bytes(val))
                                              .canonicalize();
                                          canpath.ok().or(non_repo_config_val)
                                      }
                                      _ => non_repo_config_val,
                                  }
                              }
                          };
                          config_val
                              .or_else(|| Some(get_path_from_bytes(&repo_arg).to_path_buf()))
                      }
                  };
                  let simple_exit =
                      |ui: &Ui, config: &Config, result: Result<(), CommandError>| -> ! {
                          exit(
                              &argv,
                              &initial_current_dir,
                              ui,
                              OnUnsupported::from_config(config),
                              result,
                              // TODO: show a warning or combine with original error if
                              // `get_bool` returns an error
                              non_repo_config
                                  .get_bool(b"ui", b"detailed-exit-code")
                                  .unwrap_or(false),
                          )
                      };
                  let early_exit = |config: &Config, error: CommandError| -> ! {
                      simple_exit(&Ui::new_infallible(config), config, Err(error))
                  };
                  let repo_result = match Repo::find(&non_repo_config, repo_path.to_owned())
                  {
                      Ok(repo) => Ok(repo),
                      Err(RepoError::NotFound { at }) if repo_path.is_none() => {
                          // Not finding a repo is not fatal yet, if `-R` was not given
                          Err(NoRepoInCwdError { cwd: at })
                      }
                      Err(error) => early_exit(&non_repo_config, error.into()),
                  };
                  let config = if let Ok(repo) = &repo_result {
                      repo.config()
                  } else {
                      &non_repo_config
                  };
                  let mut config_cow = Cow::Borrowed(config);
                  config_cow.to_mut().apply_plain(PlainInfo::from_env());
                  if !ui::plain(Some("tweakdefaults"))
                      && config_cow
                          .as_ref()
                          .get_bool(b"ui", b"tweakdefaults")
                          .unwrap_or_else(|error| early_exit(config, error.into()))
                  {
                      config_cow.to_mut().tweakdefaults()
                  };
                  let config = config_cow.as_ref();
                  let ui = Ui::new(config)
                      .unwrap_or_else(|error| early_exit(config, error.into()));
                  if let Ok(true) = config.get_bool(b"rhg", b"fallback-immediately") {
                      exit(
                          &argv,
                          &initial_current_dir,
                          &ui,
                          OnUnsupported::fallback(config),
                          Err(CommandError::unsupported(
                              "`rhg.fallback-immediately is true`",
                          )),
                          false,
                      )
                  }
                  let result = main_with_result(
                      argv.iter().map(|s| s.to_owned()).collect(),
                      &process_start_time,
                      &ui,
                      repo_result.as_ref(),
                      config,
                  );
                  simple_exit(&ui, config, result)
              }
              fn main() -> ! {
                  rhg_main(std::env::args_os().collect())
              }
              fn exit_code(
                  result: &Result<(), CommandError>,
                  use_detailed_exit_code: bool,
              ) -> i32 {
                  match result {
                      Ok(()) => exit_codes::OK,
                      Err(CommandError::Abort {
                          detailed_exit_code, ..
                      }) => {
                          if use_detailed_exit_code {
                              *detailed_exit_code
                          } else {
                              exit_codes::ABORT
                          }
                      }
                      Err(CommandError::Unsuccessful) => exit_codes::UNSUCCESSFUL,
                      // Exit with a specific code and no error message to let a potential
                      // wrapper script fallback to Python-based Mercurial.
                      Err(CommandError::UnsupportedFeature { .. }) => {
                          exit_codes::UNIMPLEMENTED
                      }
                      Err(CommandError::InvalidFallback { .. }) => {
                          exit_codes::INVALID_FALLBACK
                      }
                  }
              }
-             fn exit<'a>(
-                 original_args: &'a [OsString],
+             fn exit(
+                 original_args: &[OsString],
                  initial_current_dir: &Option<PathBuf>,
                  ui: &Ui,
                  mut on_unsupported: OnUnsupported,
                  result: Result<(), CommandError>,
                  use_detailed_exit_code: bool,
              ) -> ! {
                  if let (
                      OnUnsupported::Fallback { executable },
                      Err(CommandError::UnsupportedFeature { message }),
                  ) = (&on_unsupported, &result)
                  {
                      let mut args = original_args.iter();
                      let executable = match executable {
                          None => {
                              exit_no_fallback(
                                  ui,
                                  OnUnsupported::Abort,
                                  Err(CommandError::abort(
                                      "abort: 'rhg.on-unsupported=fallback' without \
                                              'rhg.fallback-executable' set.",
                                  )),
                                  false,
                              );
                          }
                          Some(executable) => executable,
                      };
                      let executable_path = get_path_from_bytes(executable);
                      let this_executable = args.next().expect("exepcted argv[0] to exist");
                      if executable_path == *this_executable {
                          // Avoid spawning infinitely many processes until resource
                          // exhaustion.
                          let _ = ui.write_stderr(&format_bytes!(
                              b"Blocking recursive fallback. The 'rhg.fallback-executable = {}' config \
                              points to `rhg` itself.\n",
                              executable
                          ));
                          on_unsupported = OnUnsupported::Abort
                      } else {
                          log::debug!("falling back (see trace-level log)");
                          log::trace!("{}", local_to_utf8(message));
                          if let Err(err) = which::which(executable_path) {
                              exit_no_fallback(
                                  ui,
                                  OnUnsupported::Abort,
                                  Err(CommandError::InvalidFallback {
                                      path: executable.to_owned(),
                                      err: err.to_string(),
                                  }),
                                  use_detailed_exit_code,
                              )
                          }
                          // `args` is now `argv[1..]` since we’ve already consumed
                          // `argv[0]`
                          let mut command = Command::new(executable_path);
                          command.args(args);
                          if let Some(initial) = initial_current_dir {
                              command.current_dir(initial);
                          }
                          // We don't use subprocess because proper signal handling is harder
                          // and we don't want to keep `rhg` around after a fallback anyway.
                          // For example, if `rhg` is run in the background and falls back to
                          // `hg` which, in turn, waits for a signal, we'll get stuck if
                          // we're doing plain subprocess.
                          //
                          // If `exec` returns, we can only assume our process is very broken
                          // (see its documentation), so only try to forward the error code
                          // when exiting.
                          let err = command.exec();
                          std::process::exit(
                              err.raw_os_error().unwrap_or(exit_codes::ABORT),
                          );
                      }
                  }
                  exit_no_fallback(ui, on_unsupported, result, use_detailed_exit_code)
              }
              fn exit_no_fallback(
                  ui: &Ui,
                  on_unsupported: OnUnsupported,
                  result: Result<(), CommandError>,
                  use_detailed_exit_code: bool,
              ) -> ! {
                  match &result {
                      Ok(_) => {}
                      Err(CommandError::Unsuccessful) => {}
                      Err(CommandError::Abort { message, hint, .. }) => {
                          // Ignore errors when writing to stderr, we’re already exiting
                          // with failure code so there’s not much more we can do.
                          if !message.is_empty() {
                              let _ = ui.write_stderr(&format_bytes!(b"{}\n", message));
                          }
                          if let Some(hint) = hint {
                              let _ = ui.write_stderr(&format_bytes!(b"({})\n", hint));
                          }
                      }
                      Err(CommandError::UnsupportedFeature { message }) => {
                          match on_unsupported {
                              OnUnsupported::Abort => {
                                  let _ = ui.write_stderr(&format_bytes!(
                                      b"unsupported feature: {}\n",
                                      message
                                  ));
                              }
                              OnUnsupported::AbortSilent => {}
                              OnUnsupported::Fallback { .. } => unreachable!(),
                          }
                      }
                      Err(CommandError::InvalidFallback { path, err }) => {
                          let _ = ui.write_stderr(&format_bytes!(
                              b"abort: invalid fallback '{}': {}\n",
                              path,
                              err.as_bytes(),
                          ));
                      }
                  }
                  std::process::exit(exit_code(&result, use_detailed_exit_code))
              }
              macro_rules! subcommands {
                  ($( $command: ident )+) => {
                      mod commands {
                          $(
                              pub mod $command;
                          )+
                      }
                      fn add_subcommand_args(app: clap::Command) -> clap::Command {
                          app
                          $(
                              .subcommand(commands::$command::args())
                          )+
                      }
                      pub type RunFn = fn(&CliInvocation) -> Result<(), CommandError>;
                      fn subcommand_run_fn(name: &str) -> Option<RunFn> {
                          match name {
                              $(
                                  stringify!($command) => Some(commands::$command::run),
                              )+
                              _ => None,
                          }
                      }
                  };
              }
              subcommands! {
                  cat
                  debugdata
                  debugrequirements
                  debugignorerhg
                  debugrhgsparse
                  files
                  root
                  config
                  status
              }
              pub struct CliInvocation<'a> {
                  ui: &'a Ui,
                  subcommand_args: &'a ArgMatches,
                  config: &'a Config,
                  /// References inside `Result` is a bit peculiar but allow
                  /// `invocation.repo?` to work out with `&CliInvocation` since this
                  /// `Result` type is `Copy`.
                  repo: Result<&'a Repo, &'a NoRepoInCwdError>,
              }
              struct NoRepoInCwdError {
                  cwd: PathBuf,
              }
              /// CLI arguments to be parsed "early" in order to be able to read
              /// configuration before using Clap. Ideally we would also use Clap for this,
              /// see <https://github.com/clap-rs/clap/discussions/2366>.
              ///
              /// These arguments are still declared when we do use Clap later, so that Clap
              /// does not return an error for their presence.
              struct EarlyArgs {
                  /// Values of all `--config` arguments. (Possibly none)
                  config: Vec<Vec<u8>>,
                  /// Value of all the `--color` argument, if any.
                  color: Option<Vec<u8>>,
                  /// Value of the `-R` or `--repository` argument, if any.
                  repo: Option<Vec<u8>>,
                  /// Value of the `--cwd` argument, if any.
                  cwd: Option<Vec<u8>>,
              }
              impl EarlyArgs {
                  fn parse<'a>(args: impl IntoIterator<Item = &'a OsString>) -> Self {
                      let mut args = args.into_iter().map(get_bytes_from_os_str);
                      let mut config = Vec::new();
                      let mut color = None;
                      let mut repo = None;
                      let mut cwd = None;
                      // Use `while let` instead of `for` so that we can also call
                      // `args.next()` inside the loop.
                      while let Some(arg) = args.next() {
                          if arg == b"--config" {
                              if let Some(value) = args.next() {
                                  config.push(value)
                              }
                          } else if let Some(value) = arg.drop_prefix(b"--config=") {
                              config.push(value.to_owned())
                          }
                          if arg == b"--color" {
                              if let Some(value) = args.next() {
                                  color = Some(value)
                              }
                          } else if let Some(value) = arg.drop_prefix(b"--color=") {
                              color = Some(value.to_owned())
                          }
                          if arg == b"--cwd" {
                              if let Some(value) = args.next() {
                                  cwd = Some(value)
                              }
                          } else if let Some(value) = arg.drop_prefix(b"--cwd=") {
                              cwd = Some(value.to_owned())
                          }
                          if arg == b"--repository" || arg == b"-R" {
                              if let Some(value) = args.next() {
                                  repo = Some(value)
                              }
                          } else if let Some(value) = arg.drop_prefix(b"--repository=") {
                              repo = Some(value.to_owned())
                          } else if let Some(value) = arg.drop_prefix(b"-R") {
                              repo = Some(value.to_owned())
                          }
                      }
                      Self {
                          config,
                          color,
                          repo,
                          cwd,
                      }
                  }
              }
              /// What to do when encountering some unsupported feature.
              ///
              /// See `HgError::UnsupportedFeature` and `CommandError::UnsupportedFeature`.
              enum OnUnsupported {
                  /// Print an error message describing what feature is not supported,
                  /// and exit with code 252.
                  Abort,
                  /// Silently exit with code 252.
                  AbortSilent,
                  /// Try running a Python implementation
                  Fallback { executable: Option<Vec<u8>> },
              }
              impl OnUnsupported {
                  const DEFAULT: Self = OnUnsupported::Abort;
                  fn fallback_executable(config: &Config) -> Option<Vec<u8>> {
                      config
                          .get(b"rhg", b"fallback-executable")
                          .map(|x| x.to_owned())
                  }
                  fn fallback(config: &Config) -> Self {
                      OnUnsupported::Fallback {
                          executable: Self::fallback_executable(config),
                      }
                  }
                  fn from_config(config: &Config) -> Self {
                      match config
                          .get(b"rhg", b"on-unsupported")
                          .map(|value| value.to_ascii_lowercase())
                          .as_deref()
                      {
                          Some(b"abort") => OnUnsupported::Abort,
                          Some(b"abort-silent") => OnUnsupported::AbortSilent,
                          Some(b"fallback") => Self::fallback(config),
                          None => Self::DEFAULT,
                          Some(_) => {
                              // TODO: warn about unknown config value
                              Self::DEFAULT
                          }
                      }
                  }
              }
              /// The `*` extension is an edge-case for config sub-options that apply to all
              /// extensions. For now, only `:required` exists, but that may change in the
              /// future.
              const SUPPORTED_EXTENSIONS: &[&[u8]] = &[
                  b"blackbox",
                  b"share",
                  b"sparse",
                  b"narrow",
                  b"*",
                  b"strip",
                  b"rebase",
              ];
              fn check_extensions(config: &Config) -> Result<(), CommandError> {
                  if let Some(b"*") = config.get(b"rhg", b"ignored-extensions") {
                      // All extensions are to be ignored, nothing to do here
                      return Ok(());
                  }
                  let enabled: HashSet<&[u8]> = config
                      .iter_section(b"extensions")
                      .filter_map(|(extension, value)| {
                          if value == b"!" {
                              // Filter out disabled extensions
                              return None;
                          }
                          // Ignore extension suboptions. Only `required` exists for now.
                          // `rhg` either supports an extension or doesn't, so it doesn't
                          // make sense to consider the loading of an extension.
                          let actual_extension =
                              extension.split_2(b':').unwrap_or((extension, b"")).0;
                          Some(actual_extension)
                      })
                      .collect();
                  let mut unsupported = enabled;
                  for supported in SUPPORTED_EXTENSIONS {
                      unsupported.remove(supported);
                  }
                  if let Some(ignored_list) = config.get_list(b"rhg", b"ignored-extensions")
                  {
                      for ignored in ignored_list {
                          unsupported.remove(ignored.as_slice());
                      }
                  }
                  if unsupported.is_empty() {
                      Ok(())
                  } else {
                      let mut unsupported: Vec<_> = unsupported.into_iter().collect();
                      // Sort the extensions to get a stable output
                      unsupported.sort();
                      Err(CommandError::UnsupportedFeature {
                          message: format_bytes!(
                              b"extensions: {} (consider adding them to 'rhg.ignored-extensions' config)",
                              join(unsupported, b", ")
                          ),
                      })
                  }
              }
              /// Array of tuples of (auto upgrade conf, feature conf, local requirement)
              #[allow(clippy::type_complexity)]
              const AUTO_UPGRADES: &[((&str, &str), (&str, &str), &str)] = &[
                  (
                      ("format", "use-share-safe.automatic-upgrade-of-mismatching-repositories"),
                      ("format", "use-share-safe"),
                      requirements::SHARESAFE_REQUIREMENT,
                  ),
                  (
                      ("format", "use-dirstate-tracked-hint.automatic-upgrade-of-mismatching-repositories"),
                      ("format", "use-dirstate-tracked-hint"),
                      requirements::DIRSTATE_TRACKED_HINT_V1,
                  ),
                  (
                      ("format", "use-dirstate-v2.automatic-upgrade-of-mismatching-repositories"),
                      ("format", "use-dirstate-v2"),
                      requirements::DIRSTATE_V2_REQUIREMENT,
                  ),
              ];
              /// Mercurial allows users to automatically upgrade their repository.
              /// `rhg` does not have the ability to upgrade yet, so fallback if an upgrade
              /// is needed.
              fn check_auto_upgrade(
                  config: &Config,
                  reqs: &HashSet<String>,
              ) -> Result<(), CommandError> {
                  for (upgrade_conf, feature_conf, local_req) in AUTO_UPGRADES.iter() {
                      let auto_upgrade = config
                          .get_bool(upgrade_conf.0.as_bytes(), upgrade_conf.1.as_bytes())?;
                      if auto_upgrade {
                          let want_it = config.get_bool(
                              feature_conf.0.as_bytes(),
                              feature_conf.1.as_bytes(),
                          )?;
                          let have_it = reqs.contains(*local_req);
                          let action = match (want_it, have_it) {
                              (true, false) => Some("upgrade"),
                              (false, true) => Some("downgrade"),
                              _ => None,
                          };
                          if let Some(action) = action {
                              let message = format!(
                                  "automatic {} {}.{}",
                                  action, upgrade_conf.0, upgrade_conf.1
                              );
                              return Err(CommandError::unsupported(message));
                          }
                      }
                  }
                  Ok(())
              }
              fn check_unsupported(
                  config: &Config,
                  repo: Result<&Repo, &NoRepoInCwdError>,
              ) -> Result<(), CommandError> {
                  check_extensions(config)?;
                  if std::env::var_os("HG_PENDING").is_some() {
                      // TODO: only if the value is `== repo.working_directory`?
                      // What about relative v.s. absolute paths?
                      Err(CommandError::unsupported("$HG_PENDING"))?
                  }
                  if let Ok(repo) = repo {
                      if repo.has_subrepos()? {
                          Err(CommandError::unsupported("sub-repositories"))?
                      }
                      check_auto_upgrade(config, repo.requirements())?;
                  }
                  if config.has_non_empty_section(b"encode") {
                      Err(CommandError::unsupported("[encode] config"))?
                  }
                  if config.has_non_empty_section(b"decode") {
                      Err(CommandError::unsupported("[decode] config"))?
                  }
                  Ok(())
              }

rust/rhg/src/ui.rs

0 +2 -2

              use crate::color::ColorConfig;
              use crate::color::Effect;
              use crate::error::CommandError;
              use format_bytes::format_bytes;
              use format_bytes::write_bytes;
              use hg::config::Config;
              use hg::config::PlainInfo;
              use hg::errors::HgError;
              use hg::repo::Repo;
              use hg::sparse;
              use hg::utils::files::get_bytes_from_path;
              use hg::PatternFileWarning;
              use std::borrow::Cow;
              use std::io;
              use std::io::{ErrorKind, Write};
              pub struct Ui {
                  stdout: std::io::Stdout,
                  stderr: std::io::Stderr,
                  colors: Option<ColorConfig>,
              }
              /// The kind of user interface error
              pub enum UiError {
                  /// The standard output stream cannot be written to
                  StdoutError(io::Error),
                  /// The standard error stream cannot be written to
                  StderrError(io::Error),
              }
              /// The commandline user interface
              impl Ui {
                  pub fn new(config: &Config) -> Result<Self, HgError> {
                      Ok(Ui {
                          // If using something else, also adapt `isatty()` below.
                          stdout: std::io::stdout(),
                          stderr: std::io::stderr(),
                          colors: ColorConfig::new(config)?,
                      })
                  }
                  /// Default to no color if color configuration errors.
                  ///
                  /// Useful when we’re already handling another error.
                  pub fn new_infallible(config: &Config) -> Self {
                      Ui {
                          // If using something else, also adapt `isatty()` below.
                          stdout: std::io::stdout(),
                          stderr: std::io::stderr(),
                          colors: ColorConfig::new(config).unwrap_or(None),
                      }
                  }
                  /// Returns a buffered handle on stdout for faster batch printing
                  /// operations.
                  pub fn stdout_buffer(&self) -> StdoutBuffer<std::io::StdoutLock> {
                      StdoutBuffer::new(self.stdout.lock())
                  }
                  /// Write bytes to stdout
                  pub fn write_stdout(&self, bytes: &[u8]) -> Result<(), UiError> {
                      let mut stdout = self.stdout.lock();
                      stdout.write_all(bytes).or_else(handle_stdout_error)?;
                      stdout.flush().or_else(handle_stdout_error)
                  }
                  /// Write bytes to stderr
                  pub fn write_stderr(&self, bytes: &[u8]) -> Result<(), UiError> {
                      let mut stderr = self.stderr.lock();
                      stderr.write_all(bytes).or_else(handle_stderr_error)?;
                      stderr.flush().or_else(handle_stderr_error)
                  }
                  /// Write bytes to stdout with the given label
                  ///
                  /// Like the optional `label` parameter in `mercurial/ui.py`,
                  /// this label influences the color used for this output.
                  pub fn write_stdout_labelled(
                      &self,
                      bytes: &[u8],
                      label: &str,
                  ) -> Result<(), UiError> {
                      if let Some(colors) = &self.colors {
                          if let Some(effects) = colors.styles.get(label.as_bytes()) {
                              if !effects.is_empty() {
                                  return self
                                      .write_stdout_with_effects(bytes, effects)
                                      .or_else(handle_stdout_error);
                              }
                          }
                      }
                      self.write_stdout(bytes)
                  }
                  fn write_stdout_with_effects(
                      &self,
                      bytes: &[u8],
                      effects: &[Effect],
                  ) -> io::Result<()> {
                      let stdout = &mut self.stdout.lock();
                      let mut write_line = |line: &[u8], first: bool| {
                          // `line` does not include the newline delimiter
                          if !first {
                              stdout.write_all(b"\n")?;
                          }
                          if line.is_empty() {
                              return Ok(());
                          }
                          /// 0x1B == 27 == 0o33
                          const ASCII_ESCAPE: &[u8] = b"\x1b";
                          write_bytes!(stdout, b"{}[0", ASCII_ESCAPE)?;
                          for effect in effects {
                              write_bytes!(stdout, b";{}", effect)?;
                          }
                          write_bytes!(stdout, b"m")?;
                          stdout.write_all(line)?;
                          write_bytes!(stdout, b"{}[0m", ASCII_ESCAPE)
                      };
                      let mut lines = bytes.split(|&byte| byte == b'\n');
                      if let Some(first) = lines.next() {
                          write_line(first, true)?;
                          for line in lines {
                              write_line(line, false)?
                          }
                      }
                      stdout.flush()
                  }
              }
              // TODO: pass the PlainInfo to call sites directly and
              // delete this function
              pub fn plain(opt_feature: Option<&str>) -> bool {
                  let plain_info = PlainInfo::from_env();
                  match opt_feature {
                      None => plain_info.is_plain(),
                      Some(feature) => plain_info.is_feature_plain(feature),
                  }
              }
              /// A buffered stdout writer for faster batch printing operations.
              pub struct StdoutBuffer<W: Write> {
                  buf: io::BufWriter<W>,
              }
              impl<W: Write> StdoutBuffer<W> {
                  pub fn new(writer: W) -> Self {
                      let buf = io::BufWriter::new(writer);
                      Self { buf }
                  }
                  /// Write bytes to stdout buffer
                  pub fn write_all(&mut self, bytes: &[u8]) -> Result<(), UiError> {
                      self.buf.write_all(bytes).or_else(handle_stdout_error)
                  }
                  /// Flush bytes to stdout
                  pub fn flush(&mut self) -> Result<(), UiError> {
                      self.buf.flush().or_else(handle_stdout_error)
                  }
              }
              /// Sometimes writing to stdout is not possible, try writing to stderr to
              /// signal that failure, otherwise just bail.
              fn handle_stdout_error(error: io::Error) -> Result<(), UiError> {
                  if let ErrorKind::BrokenPipe = error.kind() {
                      // This makes `| head` work for example
                      return Ok(());
                  }
                  let mut stderr = io::stderr();
                  stderr
                      .write_all(&format_bytes!(
                          b"abort: {}\n",
                          error.to_string().as_bytes()
                      ))
                      .map_err(UiError::StderrError)?;
                  stderr.flush().map_err(UiError::StderrError)?;
                  Err(UiError::StdoutError(error))
              }
              /// Sometimes writing to stderr is not possible.
              fn handle_stderr_error(error: io::Error) -> Result<(), UiError> {
                  // A broken pipe should not result in a error
                  // like with `| head` for example
                  if let ErrorKind::BrokenPipe = error.kind() {
                      return Ok(());
                  }
                  Err(UiError::StdoutError(error))
              }
              /// Encode rust strings according to the user system.
              pub fn utf8_to_local(s: &str) -> Cow<[u8]> {
                  // TODO encode for the user's system //
                  let bytes = s.as_bytes();
                  Cow::Borrowed(bytes)
              }
              /// Decode user system bytes to Rust string.
              pub fn local_to_utf8(s: &[u8]) -> Cow<str> {
                  // TODO decode from the user's system
                  String::from_utf8_lossy(s)
              }
              /// Should formatted output be used?
              ///
              /// Note: rhg does not have the formatter mechanism yet,
              /// but this is also used when deciding whether to use color.
              pub fn formatted(config: &Config) -> Result<bool, HgError> {
                  if let Some(formatted) = config.get_option(b"ui", b"formatted")? {
                      Ok(formatted)
                  } else {
                      isatty(config)
                  }
              }
              pub enum RelativePaths {
                  Legacy,
                  Bool(bool),
              }
              pub fn relative_paths(config: &Config) -> Result<RelativePaths, HgError> {
                  Ok(match config.get(b"ui", b"relative-paths") {
                      None | Some(b"legacy") => RelativePaths::Legacy,
                      _ => RelativePaths::Bool(config.get_bool(b"ui", b"relative-paths")?),
                  })
              }
              fn isatty(config: &Config) -> Result<bool, HgError> {
                  Ok(if config.get_bool(b"ui", b"nontty")? {
                      false
                  } else {
                      atty::is(atty::Stream::Stdout)
                  })
              }
              /// Return the formatted bytestring corresponding to a pattern file warning,
              /// as expected by the CLI.
              pub(crate) fn format_pattern_file_warning(
                  warning: &PatternFileWarning,
                  repo: &Repo,
              ) -> Vec<u8> {
                  match warning {
                      PatternFileWarning::InvalidSyntax(path, syntax) => format_bytes!(
                          b"{}: ignoring invalid syntax '{}'\n",
                          get_bytes_from_path(path),
-                         &*syntax
+                         syntax
                      ),
                      PatternFileWarning::NoSuchFile(path) => {
                          let path = if let Ok(relative) =
                              path.strip_prefix(repo.working_directory_path())
                          {
                              relative
                          } else {
-                             &*path
+                             path
                          };
                          format_bytes!(
                              b"skipping unreadable pattern file '{}': \
                                  No such file or directory\n",
                              get_bytes_from_path(path),
                          )
                      }
                  }
              }
              /// Print with `Ui` the formatted bytestring corresponding to a
              /// sparse/narrow warning, as expected by the CLI.
              pub(crate) fn print_narrow_sparse_warnings(
                  narrow_warnings: &[sparse::SparseWarning],
                  sparse_warnings: &[sparse::SparseWarning],
                  ui: &Ui,
                  repo: &Repo,
              ) -> Result<(), CommandError> {
                  for warning in narrow_warnings.iter().chain(sparse_warnings) {
                      match &warning {
                          sparse::SparseWarning::RootWarning { context, line } => {
                              let msg = format_bytes!(
                                  b"warning: {} profile cannot use paths \"
                              starting with /, ignoring {}\n",
                                  context,
                                  line
                              );
                              ui.write_stderr(&msg)?;
                          }
                          sparse::SparseWarning::ProfileNotFound { profile, rev } => {
                              let msg = format_bytes!(
                                  b"warning: sparse profile '{}' not found \"
                              in rev {} - ignoring it\n",
                                  profile,
                                  rev
                              );
                              ui.write_stderr(&msg)?;
                          }
                          sparse::SparseWarning::Pattern(e) => {
                              ui.write_stderr(&format_pattern_file_warning(e, repo))?;
                          }
                      }
                  }
                  Ok(())
              }

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages