# HG changeset patch # User Raphaël Gomès # Date 2020-12-29 09:53:45 # Node ID 95d6f31e88db40fec8bc456be8a1e287751db66a # Parent 5f27924a201d94bd82a14eb048578a39f8d7406c hg-core: add basic config module The config module exposes a `Config` struct, unused for now. It only reads the config file local to the repository, but handles all valid patterns and includes/unsets. It is structured in layers instead of erasing by reverse order of precedence, allowing us to transparently know more about the config for debugging purposes, and potentially other things I haven't thought about yet. This change also introduces `format_bytes!` to `hg-core`. Differential Revision: https://phab.mercurial-scm.org/D9408 diff --git a/rust/hg-core/src/config.rs b/rust/hg-core/src/config.rs new file mode 100644 --- /dev/null +++ b/rust/hg-core/src/config.rs @@ -0,0 +1,14 @@ +// config.rs +// +// Copyright 2020 +// Valentin Gatien-Baron, +// Raphaël Gomès +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +//! Mercurial config parsing and interfaces. + +mod config; +mod layer; +pub use config::Config; diff --git a/rust/hg-core/src/config/config.rs b/rust/hg-core/src/config/config.rs new file mode 100644 --- /dev/null +++ b/rust/hg-core/src/config/config.rs @@ -0,0 +1,197 @@ +// config.rs +// +// Copyright 2020 +// Valentin Gatien-Baron, +// Raphaël Gomès +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +use super::layer; +use crate::config::layer::{ConfigError, ConfigLayer, ConfigValue}; +use std::path::PathBuf; + +use crate::operations::find_root; +use crate::utils::files::read_whole_file; + +/// Holds the config values for the current repository +/// TODO update this docstring once we support more sources +pub struct Config { + layers: Vec, +} + +impl std::fmt::Debug for Config { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for (index, layer) in self.layers.iter().rev().enumerate() { + write!( + f, + "==== Layer {} (trusted: {}) ====\n{:?}", + index, layer.trusted, layer + )?; + } + Ok(()) + } +} + +pub enum ConfigSource { + /// Absolute path to a config file + AbsPath(PathBuf), + /// Already parsed (from the CLI, env, Python resources, etc.) + Parsed(layer::ConfigLayer), +} + +pub fn parse_bool(v: &[u8]) -> Option { + match v.to_ascii_lowercase().as_slice() { + b"1" | b"yes" | b"true" | b"on" | b"always" => Some(true), + b"0" | b"no" | b"false" | b"off" | b"never" => Some(false), + _ => None, + } +} + +impl Config { + /// Loads in order, which means that the precedence is the same + /// as the order of `sources`. + pub fn load_from_explicit_sources( + sources: Vec, + ) -> Result { + let mut layers = vec![]; + + for source in sources.into_iter() { + match source { + ConfigSource::Parsed(c) => layers.push(c), + ConfigSource::AbsPath(c) => { + // TODO check if it should be trusted + // mercurial/ui.py:427 + let data = match read_whole_file(&c) { + Err(_) => continue, // same as the python code + Ok(data) => data, + }; + layers.extend(ConfigLayer::parse(&c, &data)?) + } + } + } + + Ok(Config { layers }) + } + + /// Loads the local config. In a future version, this will also load the + /// `$HOME/.hgrc` and more to mirror the Python implementation. + pub fn load() -> Result { + let root = find_root().unwrap(); + Ok(Self::load_from_explicit_sources(vec![ + ConfigSource::AbsPath(root.join(".hg/hgrc")), + ])?) + } + + /// Returns an `Err` if the first value found is not a valid boolean. + /// Otherwise, returns an `Ok(option)`, where `option` is the boolean if + /// found, or `None`. + pub fn get_option( + &self, + section: &[u8], + item: &[u8], + ) -> Result, ConfigError> { + match self.get_inner(§ion, &item) { + Some((layer, v)) => match parse_bool(&v.bytes) { + Some(b) => Ok(Some(b)), + None => Err(ConfigError::Parse { + origin: layer.origin.to_owned(), + line: v.line, + bytes: v.bytes.to_owned(), + }), + }, + None => Ok(None), + } + } + + /// Returns the corresponding boolean in the config. Returns `Ok(false)` + /// if the value is not found, an `Err` if it's not a valid boolean. + pub fn get_bool( + &self, + section: &[u8], + item: &[u8], + ) -> Result { + Ok(self.get_option(section, item)?.unwrap_or(false)) + } + + /// Returns the raw value bytes of the first one found, or `None`. + pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&[u8]> { + self.get_inner(section, item) + .map(|(_, value)| value.bytes.as_ref()) + } + + /// Returns the layer and the value of the first one found, or `None`. + fn get_inner( + &self, + section: &[u8], + item: &[u8], + ) -> Option<(&ConfigLayer, &ConfigValue)> { + for layer in self.layers.iter().rev() { + if !layer.trusted { + continue; + } + if let Some(v) = layer.get(§ion, &item) { + return Some((&layer, v)); + } + } + None + } + + /// Get raw values bytes from all layers (even untrusted ones) in order + /// of precedence. + #[cfg(test)] + fn get_all(&self, section: &[u8], item: &[u8]) -> Vec<&[u8]> { + let mut res = vec![]; + for layer in self.layers.iter().rev() { + if let Some(v) = layer.get(§ion, &item) { + res.push(v.bytes.as_ref()); + } + } + res + } +} + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + use std::fs::File; + use std::io::Write; + + #[test] + fn test_include_layer_ordering() { + let tmpdir = tempfile::tempdir().unwrap(); + let tmpdir_path = tmpdir.path(); + let mut included_file = + File::create(&tmpdir_path.join("included.rc")).unwrap(); + + included_file.write_all(b"[section]\nitem=value1").unwrap(); + let base_config_path = tmpdir_path.join("base.rc"); + let mut config_file = File::create(&base_config_path).unwrap(); + let data = + b"[section]\nitem=value0\n%include included.rc\nitem=value2"; + config_file.write_all(data).unwrap(); + + let sources = vec![ConfigSource::AbsPath(base_config_path)]; + let config = Config::load_from_explicit_sources(sources) + .expect("expected valid config"); + + dbg!(&config); + + let (_, value) = config.get_inner(b"section", b"item").unwrap(); + assert_eq!( + value, + &ConfigValue { + bytes: b"value2".to_vec(), + line: Some(4) + } + ); + + let value = config.get(b"section", b"item").unwrap(); + assert_eq!(value, b"value2",); + assert_eq!( + config.get_all(b"section", b"item"), + [b"value2", b"value1", b"value0"] + ); + } +} diff --git a/rust/hg-core/src/config/layer.rs b/rust/hg-core/src/config/layer.rs new file mode 100644 --- /dev/null +++ b/rust/hg-core/src/config/layer.rs @@ -0,0 +1,268 @@ +// layer.rs +// +// Copyright 2020 +// Valentin Gatien-Baron, +// Raphaël Gomès +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +use crate::utils::files::{ + get_bytes_from_path, get_path_from_bytes, read_whole_file, +}; +use format_bytes::format_bytes; +use lazy_static::lazy_static; +use regex::bytes::Regex; +use std::collections::HashMap; +use std::io; +use std::path::{Path, PathBuf}; + +lazy_static! { + static ref SECTION_RE: Regex = make_regex(r"^\[([^\[]+)\]"); + static ref ITEM_RE: Regex = make_regex(r"^([^=\s][^=]*?)\s*=\s*((.*\S)?)"); + /// Continuation whitespace + static ref CONT_RE: Regex = make_regex(r"^\s+(\S|\S.*\S)\s*$"); + static ref EMPTY_RE: Regex = make_regex(r"^(;|#|\s*$)"); + static ref COMMENT_RE: Regex = make_regex(r"^(;|#)"); + /// A directive that allows for removing previous entries + static ref UNSET_RE: Regex = make_regex(r"^%unset\s+(\S+)"); + /// A directive that allows for including other config files + static ref INCLUDE_RE: Regex = make_regex(r"^%include\s+(\S|\S.*\S)\s*$"); +} + +/// All config values separated by layers of precedence. +/// Each config source may be split in multiple layers if `%include` directives +/// are used. +/// TODO detail the general precedence +#[derive(Clone)] +pub struct ConfigLayer { + /// Mapping of the sections to their items + sections: HashMap, ConfigItem>, + /// All sections (and their items/values) in a layer share the same origin + pub origin: ConfigOrigin, + /// Whether this layer comes from a trusted user or group + pub trusted: bool, +} + +impl ConfigLayer { + pub fn new(origin: ConfigOrigin) -> Self { + ConfigLayer { + sections: HashMap::new(), + trusted: true, // TODO check + origin, + } + } + + /// Add an entry to the config, overwriting the old one if already present. + pub fn add( + &mut self, + section: Vec, + item: Vec, + value: Vec, + line: Option, + ) { + self.sections + .entry(section) + .or_insert_with(|| HashMap::new()) + .insert(item, ConfigValue { bytes: value, line }); + } + + /// Returns the config value in `
.` if it exists + pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&ConfigValue> { + Some(self.sections.get(section)?.get(item)?) + } + + pub fn is_empty(&self) -> bool { + self.sections.is_empty() + } + + /// Returns a `Vec` of layers in order of precedence (so, in read order), + /// recursively parsing the `%include` directives if any. + pub fn parse(src: &Path, data: &[u8]) -> Result, ConfigError> { + let mut layers = vec![]; + + // Discard byte order mark if any + let data = if data.starts_with(b"\xef\xbb\xbf") { + &data[3..] + } else { + data + }; + + // TODO check if it's trusted + let mut current_layer = Self::new(ConfigOrigin::File(src.to_owned())); + + let mut lines_iter = + data.split(|b| *b == b'\n').enumerate().peekable(); + let mut section = b"".to_vec(); + + while let Some((index, bytes)) = lines_iter.next() { + if let Some(m) = INCLUDE_RE.captures(&bytes) { + let filename_bytes = &m[1]; + let filename_to_include = get_path_from_bytes(&filename_bytes); + match read_include(&src, &filename_to_include) { + (include_src, Ok(data)) => { + layers.push(current_layer); + layers.extend(Self::parse(&include_src, &data)?); + current_layer = + Self::new(ConfigOrigin::File(src.to_owned())); + } + (_, Err(e)) => { + return Err(ConfigError::IncludeError { + path: filename_to_include.to_owned(), + io_error: e, + }) + } + } + } else if let Some(_) = EMPTY_RE.captures(&bytes) { + } else if let Some(m) = SECTION_RE.captures(&bytes) { + section = m[1].to_vec(); + } else if let Some(m) = ITEM_RE.captures(&bytes) { + let item = m[1].to_vec(); + let mut value = m[2].to_vec(); + loop { + match lines_iter.peek() { + None => break, + Some((_, v)) => { + if let Some(_) = COMMENT_RE.captures(&v) { + } else if let Some(_) = CONT_RE.captures(&v) { + value.extend(b"\n"); + value.extend(&m[1]); + } else { + break; + } + } + }; + lines_iter.next(); + } + current_layer.add( + section.clone(), + item, + value, + Some(index + 1), + ); + } else if let Some(m) = UNSET_RE.captures(&bytes) { + if let Some(map) = current_layer.sections.get_mut(§ion) { + map.remove(&m[1]); + } + } else { + return Err(ConfigError::Parse { + origin: ConfigOrigin::File(src.to_owned()), + line: Some(index + 1), + bytes: bytes.to_owned(), + }); + } + } + if !current_layer.is_empty() { + layers.push(current_layer); + } + Ok(layers) + } +} + +impl std::fmt::Debug for ConfigLayer { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut sections: Vec<_> = self.sections.iter().collect(); + sections.sort_by(|e0, e1| e0.0.cmp(e1.0)); + + for (section, items) in sections.into_iter() { + let mut items: Vec<_> = items.into_iter().collect(); + items.sort_by(|e0, e1| e0.0.cmp(e1.0)); + + for (item, config_entry) in items { + writeln!( + f, + "{}", + String::from_utf8_lossy(&format_bytes!( + b"{}.{}={} # {}", + section, + item, + &config_entry.bytes, + &self.origin.to_bytes(), + )) + )? + } + } + Ok(()) + } +} + +/// Mapping of section item to value. +/// In the following: +/// ```text +/// [ui] +/// paginate=no +/// ``` +/// "paginate" is the section item and "no" the value. +pub type ConfigItem = HashMap, ConfigValue>; + +#[derive(Clone, Debug, PartialEq)] +pub struct ConfigValue { + /// The raw bytes of the value (be it from the CLI, env or from a file) + pub bytes: Vec, + /// Only present if the value comes from a file, 1-indexed. + pub line: Option, +} + +#[derive(Clone, Debug)] +pub enum ConfigOrigin { + /// The value comes from a configuration file + File(PathBuf), + /// The value comes from the environment like `$PAGER` or `$EDITOR` + Environment(Vec), + /* TODO cli + * TODO defaults (configitems.py) + * TODO extensions + * TODO Python resources? + * Others? */ +} + +impl ConfigOrigin { + /// TODO use some kind of dedicated trait? + pub fn to_bytes(&self) -> Vec { + match self { + ConfigOrigin::File(p) => get_bytes_from_path(p), + ConfigOrigin::Environment(e) => e.to_owned(), + } + } +} + +#[derive(Debug)] +pub enum ConfigError { + Parse { + origin: ConfigOrigin, + line: Option, + bytes: Vec, + }, + /// Failed to include a sub config file + IncludeError { + path: PathBuf, + io_error: std::io::Error, + }, + /// Any IO error that isn't expected + IO(std::io::Error), +} + +impl From for ConfigError { + fn from(e: std::io::Error) -> Self { + Self::IO(e) + } +} + +fn make_regex(pattern: &'static str) -> Regex { + Regex::new(pattern).expect("expected a valid regex") +} + +/// Includes are relative to the file they're defined in, unless they're +/// absolute. +fn read_include( + old_src: &Path, + new_src: &Path, +) -> (PathBuf, io::Result>) { + if new_src.is_absolute() { + (new_src.to_path_buf(), read_whole_file(&new_src)) + } else { + let dir = old_src.parent().unwrap(); + let new_src = dir.join(&new_src); + (new_src.to_owned(), read_whole_file(&new_src)) + } +} diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs --- a/rust/hg-core/src/lib.rs +++ b/rust/hg-core/src/lib.rs @@ -26,6 +26,7 @@ pub mod matchers; pub mod repo; pub mod revlog; pub use revlog::*; +pub mod config; pub mod operations; pub mod utils; diff --git a/rust/hg-core/src/utils/files.rs b/rust/hg-core/src/utils/files.rs --- a/rust/hg-core/src/utils/files.rs +++ b/rust/hg-core/src/utils/files.rs @@ -18,6 +18,7 @@ use lazy_static::lazy_static; use same_file::is_same_file; use std::borrow::{Cow, ToOwned}; use std::fs::Metadata; +use std::io::Read; use std::iter::FusedIterator; use std::ops::Deref; use std::path::{Path, PathBuf}; @@ -308,6 +309,17 @@ pub fn relativize_path(path: &HgPath, cw } } +/// Reads a file in one big chunk instead of doing multiple reads +pub fn read_whole_file(filepath: &Path) -> std::io::Result> { + let mut file = std::fs::File::open(filepath)?; + let size = file.metadata()?.len(); + + let mut res = vec![0; size as usize]; + file.read_exact(&mut res)?; + + Ok(res) +} + #[cfg(test)] mod tests { use super::*;