upstream/mercurial-mirror Commit - r48762:6961eca0

rhg: Port Python’s `ui.configlist` as `Config::get_list`...

Simon Sapin -

r48762:6961eca0 default

parent child

tests/test-config-parselist.py

0 created 644 +52 0

@@ -0,0 +1,52 b''
	1	"""
	2	List-valued configuration keys have an ad-hoc microsyntax. From `hg help config`:
	3
	4	> List values are separated by whitespace or comma, except when values are
	5	> placed in double quotation marks:
	6	>
	7	> allow_read = "John Doe, PhD", brian, betty
	8	>
	9	> Quotation marks can be escaped by prefixing them with a backslash. Only
	10	> quotation marks at the beginning of a word is counted as a quotation
	11	> (e.g., ``foo"bar baz`` is the list of ``foo"bar`` and ``baz``).
	12
	13	That help documentation is fairly light on details, the actual parser has many
	14	other edge cases. This test tries to cover them.
	15	"""
	16
	17	from mercurial.utils import stringutil
	18
	19
	20	def assert_parselist(input, expected):
	21	result = stringutil.parselist(input)
	22	if result != expected:
	23	raise AssertionError(
	24	"parse_input(%r)\n got %r\nexpected %r"
	25	% (input, result, expected)
	26	)
	27
	28
	29	# Keep these Python tests in sync with the Rust ones in `rust/hg-core/src/config/values.rs`
	30
	31	assert_parselist(b'', [])
	32	assert_parselist(b',', [])
	33	assert_parselist(b'A', [b'A'])
	34	assert_parselist(b'B,B', [b'B', b'B'])
	35	assert_parselist(b', C, ,C,', [b'C', b'C'])
	36	assert_parselist(b'"', [b'"'])
	37	assert_parselist(b'""', [b'', b''])
	38	assert_parselist(b'D,"', [b'D', b'"'])
	39	assert_parselist(b'E,""', [b'E', b'', b''])
	40	assert_parselist(b'"F,F"', [b'F,F'])
	41	assert_parselist(b'"G,G', [b'"G', b'G'])
	42	assert_parselist(b'"H \\",\\"H', [b'"H', b',', b'H'])
	43	assert_parselist(b'I,I"', [b'I', b'I"'])
	44	assert_parselist(b'J,"J', [b'J', b'"J'])
	45	assert_parselist(b'K K', [b'K', b'K'])
	46	assert_parselist(b'"K" K', [b'K', b'K'])
	47	assert_parselist(b'L\tL', [b'L', b'L'])
	48	assert_parselist(b'"L"\tL', [b'L', b'', b'L'])
	49	assert_parselist(b'M\x0bM', [b'M', b'M'])
	50	assert_parselist(b'"M"\x0bM', [b'M', b'', b'M'])
	51	assert_parselist(b'"N" , ,"', [b'N"'])
	52	assert_parselist(b'" ,O, ', [b'"', b'O'])

rust/hg-core/src/config/config.rs

0 +10 0

             // config.rs
             //
             // Copyright 2020
             //      Valentin Gatien-Baron,
             //      Raphaël Gomès <rgomes@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             use super::layer;
             use super::values;
             use crate::config::layer::{
                 ConfigError, ConfigLayer, ConfigOrigin, ConfigValue,
             };
             use crate::utils::files::get_bytes_from_os_str;
             use crate::utils::SliceExt;
             use format_bytes::{write_bytes, DisplayBytes};
             use std::collections::HashSet;
             use std::env;
             use std::fmt;
             use std::path::{Path, PathBuf};
             use std::str;
             use crate::errors::{HgResultExt, IoResultExt};
             /// Holds the config values for the current repository
             /// TODO update this docstring once we support more sources
             #[derive(Clone)]
             pub struct Config {
                 layers: Vec<layer::ConfigLayer>,
             }
             impl DisplayBytes for Config {
                 fn display_bytes(
                     &self,
                     out: &mut dyn std::io::Write,
                 ) -> std::io::Result<()> {
                     for (index, layer) in self.layers.iter().rev().enumerate() {
                         write_bytes!(
                             out,
                             b"==== Layer {} (trusted: {}) ====\n{}",
                             index,
                             if layer.trusted {
                                 &b"yes"[..]
                             } else {
                                 &b"no"[..]
                             },
                             layer
                         )?;
                     }
                     Ok(())
                 }
             }
             pub enum ConfigSource {
                 /// Absolute path to a config file
                 AbsPath(PathBuf),
                 /// Already parsed (from the CLI, env, Python resources, etc.)
                 Parsed(layer::ConfigLayer),
             }
             #[derive(Debug)]
             pub struct ConfigValueParseError {
                 pub origin: ConfigOrigin,
                 pub line: Option<usize>,
                 pub section: Vec<u8>,
                 pub item: Vec<u8>,
                 pub value: Vec<u8>,
                 pub expected_type: &'static str,
             }
             impl fmt::Display for ConfigValueParseError {
                 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
                     // TODO: add origin and line number information, here and in
                     // corresponding python code
                     write!(
                         f,
                         "config error: {}.{} is not a {} ('{}')",
                         String::from_utf8_lossy(&self.section),
                         String::from_utf8_lossy(&self.item),
                         self.expected_type,
                         String::from_utf8_lossy(&self.value)
                     )
                 }
             }
             impl Config {
                 /// Load system and user configuration from various files.
                 ///
                 /// This is also affected by some environment variables.
                 pub fn load_non_repo() -> Result<Self, ConfigError> {
                     let mut config = Self { layers: Vec::new() };
                     let opt_rc_path = env::var_os("HGRCPATH");
                     // HGRCPATH replaces system config
                     if opt_rc_path.is_none() {
                         config.add_system_config()?
                     }
                     config.add_for_environment_variable("EDITOR", b"ui", b"editor");
                     config.add_for_environment_variable("VISUAL", b"ui", b"editor");
                     config.add_for_environment_variable("PAGER", b"pager", b"pager");
                     // These are set by `run-tests.py --rhg` to enable fallback for the
                     // entire test suite. Alternatives would be setting configuration
                     // through `$HGRCPATH` but some tests override that, or changing the
                     // `hg` shell alias to include `--config` but that disrupts tests that
                     // print command lines and check expected output.
                     config.add_for_environment_variable(
                         "RHG_ON_UNSUPPORTED",
                         b"rhg",
                         b"on-unsupported",
                     );
                     config.add_for_environment_variable(
                         "RHG_FALLBACK_EXECUTABLE",
                         b"rhg",
                         b"fallback-executable",
                     );
                     // HGRCPATH replaces user config
                     if opt_rc_path.is_none() {
                         config.add_user_config()?
                     }
                     if let Some(rc_path) = &opt_rc_path {
                         for path in env::split_paths(rc_path) {
                             if !path.as_os_str().is_empty() {
                                 if path.is_dir() {
                                     config.add_trusted_dir(&path)?
                                 } else {
                                     config.add_trusted_file(&path)?
                                 }
                             }
                         }
                     }
                     Ok(config)
                 }
                 pub fn load_cli_args_config(
                     &mut self,
                     cli_config_args: impl IntoIterator<Item = impl AsRef<[u8]>>,
                 ) -> Result<(), ConfigError> {
                     if let Some(layer) = ConfigLayer::parse_cli_args(cli_config_args)? {
                         self.layers.push(layer)
                     }
                     Ok(())
                 }
                 fn add_trusted_dir(&mut self, path: &Path) -> Result<(), ConfigError> {
                     if let Some(entries) = std::fs::read_dir(path)
                         .when_reading_file(path)
                         .io_not_found_as_none()?
                     {
                         let mut file_paths = entries
                             .map(|result| {
                                 result.when_reading_file(path).map(|entry| entry.path())
                             })
                             .collect::<Result<Vec<_>, _>>()?;
                         file_paths.sort();
                         for file_path in &file_paths {
                             if file_path.extension() == Some(std::ffi::OsStr::new("rc")) {
                                 self.add_trusted_file(&file_path)?
                             }
                         }
                     }
                     Ok(())
                 }
                 fn add_trusted_file(&mut self, path: &Path) -> Result<(), ConfigError> {
                     if let Some(data) = std::fs::read(path)
                         .when_reading_file(path)
                         .io_not_found_as_none()?
                     {
                         self.layers.extend(ConfigLayer::parse(path, &data)?)
                     }
                     Ok(())
                 }
                 fn add_for_environment_variable(
                     &mut self,
                     var: &str,
                     section: &[u8],
                     key: &[u8],
                 ) {
                     if let Some(value) = env::var_os(var) {
                         let origin = layer::ConfigOrigin::Environment(var.into());
                         let mut layer = ConfigLayer::new(origin);
                         layer.add(
                             section.to_owned(),
                             key.to_owned(),
                             get_bytes_from_os_str(value),
                             None,
                         );
                         self.layers.push(layer)
                     }
                 }
                 #[cfg(unix)] // TODO: other platforms
                 fn add_system_config(&mut self) -> Result<(), ConfigError> {
                     let mut add_for_prefix = |prefix: &Path| -> Result<(), ConfigError> {
                         let etc = prefix.join("etc").join("mercurial");
                         self.add_trusted_file(&etc.join("hgrc"))?;
                         self.add_trusted_dir(&etc.join("hgrc.d"))
                     };
                     let root = Path::new("/");
                     // TODO: use `std::env::args_os().next().unwrap()` a.k.a. argv[0]
                     // instead? TODO: can this be a relative path?
                     let hg = crate::utils::current_exe()?;
                     // TODO: this order (per-installation then per-system) matches
                     // `systemrcpath()` in `mercurial/scmposix.py`, but
                     // `mercurial/helptext/config.txt` suggests it should be reversed
                     if let Some(installation_prefix) = hg.parent().and_then(Path::parent) {
                         if installation_prefix != root {
                             add_for_prefix(&installation_prefix)?
                         }
                     }
                     add_for_prefix(root)?;
                     Ok(())
                 }
                 #[cfg(unix)] // TODO: other plateforms
                 fn add_user_config(&mut self) -> Result<(), ConfigError> {
                     let opt_home = home::home_dir();
                     if let Some(home) = &opt_home {
                         self.add_trusted_file(&home.join(".hgrc"))?
                     }
                     let darwin = cfg!(any(target_os = "macos", target_os = "ios"));
                     if !darwin {
                         if let Some(config_home) = env::var_os("XDG_CONFIG_HOME")
                             .map(PathBuf::from)
                             .or_else(|| opt_home.map(|home| home.join(".config")))
                         {
                             self.add_trusted_file(&config_home.join("hg").join("hgrc"))?
                         }
                     }
                     Ok(())
                 }
                 /// Loads in order, which means that the precedence is the same
                 /// as the order of `sources`.
                 pub fn load_from_explicit_sources(
                     sources: Vec<ConfigSource>,
                 ) -> Result<Self, ConfigError> {
                     let mut layers = vec![];
                     for source in sources.into_iter() {
                         match source {
                             ConfigSource::Parsed(c) => layers.push(c),
                             ConfigSource::AbsPath(c) => {
                                 // TODO check if it should be trusted
                                 // mercurial/ui.py:427
                                 let data = match std::fs::read(&c) {
                                     Err(_) => continue, // same as the python code
                                     Ok(data) => data,
                                 };
                                 layers.extend(ConfigLayer::parse(&c, &data)?)
                             }
                         }
                     }
                     Ok(Config { layers })
                 }
                 /// Loads the per-repository config into a new `Config` which is combined
                 /// with `self`.
                 pub(crate) fn combine_with_repo(
                     &self,
                     repo_config_files: &[PathBuf],
                 ) -> Result<Self, ConfigError> {
                     let (cli_layers, other_layers) = self
                         .layers
                         .iter()
                         .cloned()
                         .partition(ConfigLayer::is_from_command_line);
                     let mut repo_config = Self {
                         layers: other_layers,
                     };
                     for path in repo_config_files {
                         // TODO: check if this file should be trusted:
                         // `mercurial/ui.py:427`
                         repo_config.add_trusted_file(path)?;
                     }
                     repo_config.layers.extend(cli_layers);
                     Ok(repo_config)
                 }
                 fn get_parse<'config, T: 'config>(
                     &'config self,
                     section: &[u8],
                     item: &[u8],
                     expected_type: &'static str,
                     parse: impl Fn(&'config [u8]) -> Option<T>,
                 ) -> Result<Option<T>, ConfigValueParseError> {
                     match self.get_inner(&section, &item) {
                         Some((layer, v)) => match parse(&v.bytes) {
                             Some(b) => Ok(Some(b)),
                             None => Err(ConfigValueParseError {
                                 origin: layer.origin.to_owned(),
                                 line: v.line,
                                 value: v.bytes.to_owned(),
                                 section: section.to_owned(),
                                 item: item.to_owned(),
                                 expected_type,
                             }),
                         },
                         None => Ok(None),
                     }
                 }
                 /// Returns an `Err` if the first value found is not a valid UTF-8 string.
                 /// Otherwise, returns an `Ok(value)` if found, or `None`.
                 pub fn get_str(
                     &self,
                     section: &[u8],
                     item: &[u8],
                 ) -> Result<Option<&str>, ConfigValueParseError> {
                     self.get_parse(section, item, "ASCII or UTF-8 string", |value| {
                         str::from_utf8(value).ok()
                     })
                 }
                 /// Returns an `Err` if the first value found is not a valid unsigned
                 /// integer. Otherwise, returns an `Ok(value)` if found, or `None`.
                 pub fn get_u32(
                     &self,
                     section: &[u8],
                     item: &[u8],
                 ) -> Result<Option<u32>, ConfigValueParseError> {
                     self.get_parse(section, item, "valid integer", |value| {
                         str::from_utf8(value).ok()?.parse().ok()
                     })
                 }
                 /// Returns an `Err` if the first value found is not a valid file size
                 /// value such as `30` (default unit is bytes), `7 MB`, or `42.5 kb`.
                 /// Otherwise, returns an `Ok(value_in_bytes)` if found, or `None`.
                 pub fn get_byte_size(
                     &self,
                     section: &[u8],
                     item: &[u8],
                 ) -> Result<Option<u64>, ConfigValueParseError> {
                     self.get_parse(section, item, "byte quantity", values::parse_byte_size)
                 }
                 /// Returns an `Err` if the first value found is not a valid boolean.
                 /// Otherwise, returns an `Ok(option)`, where `option` is the boolean if
                 /// found, or `None`.
                 pub fn get_option(
                     &self,
                     section: &[u8],
                     item: &[u8],
                 ) -> Result<Option<bool>, ConfigValueParseError> {
                     self.get_parse(section, item, "boolean", values::parse_bool)
                 }
                 /// Returns the corresponding boolean in the config. Returns `Ok(false)`
                 /// if the value is not found, an `Err` if it's not a valid boolean.
                 pub fn get_bool(
                     &self,
                     section: &[u8],
                     item: &[u8],
                 ) -> Result<bool, ConfigValueParseError> {
                     Ok(self.get_option(section, item)?.unwrap_or(false))
                 }
                 /// Returns the corresponding list-value in the config if found, or `None`.
                 ///
                 /// This is appropriate for new configuration keys. The value syntax is
                 /// **not** the same as most existing list-valued config, which has Python
                 /// parsing implemented in `parselist()` in
                 /// `mercurial/utils/stringutil.py`. Faithfully porting that parsing
                 /// algorithm to Rust (including behavior that are arguably bugs)
                 /// turned out to be non-trivial and hasn’t been completed as of this
                 /// writing.
                 ///
                 /// Instead, the "simple" syntax is: split on comma, then trim leading and
                 /// trailing whitespace of each component. Quotes or backslashes are not
                 /// interpreted in any way. Commas are mandatory between values. Values
                 /// that contain a comma are not supported.
                 pub fn get_simple_list(
                     &self,
                     section: &[u8],
                     item: &[u8],
                 ) -> Option<impl Iterator<Item = &[u8]>> {
                     self.get(section, item).map(|value| {
                         value
                             .split(|&byte| byte == b',')
                             .map(|component| component.trim())
                     })
                 }
+                /// If there is an `item` value in `section`, parse and return a list of
+                /// byte strings.
+                pub fn get_list(
+                    &self,
+                    section: &[u8],
+                    item: &[u8],
+                ) -> Option<Vec<Vec<u8>>> {
+                    self.get(section, item).map(values::parse_list)
+                }
                 /// Returns the raw value bytes of the first one found, or `None`.
                 pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&[u8]> {
                     self.get_inner(section, item)
                         .map(|(_, value)| value.bytes.as_ref())
                 }
                 /// Returns the layer and the value of the first one found, or `None`.
                 fn get_inner(
                     &self,
                     section: &[u8],
                     item: &[u8],
                 ) -> Option<(&ConfigLayer, &ConfigValue)> {
                     for layer in self.layers.iter().rev() {
                         if !layer.trusted {
                             continue;
                         }
                         if let Some(v) = layer.get(&section, &item) {
                             return Some((&layer, v));
                         }
                     }
                     None
                 }
                 /// Return all keys defined for the given section
                 pub fn get_section_keys(&self, section: &[u8]) -> HashSet<&[u8]> {
                     self.layers
                         .iter()
                         .flat_map(|layer| layer.iter_keys(section))
                         .collect()
                 }
                 /// Get raw values bytes from all layers (even untrusted ones) in order
                 /// of precedence.
                 #[cfg(test)]
                 fn get_all(&self, section: &[u8], item: &[u8]) -> Vec<&[u8]> {
                     let mut res = vec![];
                     for layer in self.layers.iter().rev() {
                         if let Some(v) = layer.get(&section, &item) {
                             res.push(v.bytes.as_ref());
                         }
                     }
                     res
                 }
             }
             #[cfg(test)]
             mod tests {
                 use super::*;
                 use pretty_assertions::assert_eq;
                 use std::fs::File;
                 use std::io::Write;
                 #[test]
                 fn test_include_layer_ordering() {
                     let tmpdir = tempfile::tempdir().unwrap();
                     let tmpdir_path = tmpdir.path();
                     let mut included_file =
                         File::create(&tmpdir_path.join("included.rc")).unwrap();
                     included_file.write_all(b"[section]\nitem=value1").unwrap();
                     let base_config_path = tmpdir_path.join("base.rc");
                     let mut config_file = File::create(&base_config_path).unwrap();
                     let data =
                         b"[section]\nitem=value0\n%include included.rc\nitem=value2\n\
                           [section2]\ncount = 4\nsize = 1.5 KB\nnot-count = 1.5\nnot-size = 1 ub";
                     config_file.write_all(data).unwrap();
                     let sources = vec![ConfigSource::AbsPath(base_config_path)];
                     let config = Config::load_from_explicit_sources(sources)
                         .expect("expected valid config");
                     let (_, value) = config.get_inner(b"section", b"item").unwrap();
                     assert_eq!(
                         value,
                         &ConfigValue {
                             bytes: b"value2".to_vec(),
                             line: Some(4)
                         }
                     );
                     let value = config.get(b"section", b"item").unwrap();
                     assert_eq!(value, b"value2",);
                     assert_eq!(
                         config.get_all(b"section", b"item"),
                         [b"value2", b"value1", b"value0"]
                     );
                     assert_eq!(config.get_u32(b"section2", b"count").unwrap(), Some(4));
                     assert_eq!(
                         config.get_byte_size(b"section2", b"size").unwrap(),
                         Some(1024 + 512)
                     );
                     assert!(config.get_u32(b"section2", b"not-count").is_err());
                     assert!(config.get_byte_size(b"section2", b"not-size").is_err());
                 }
             }

rust/hg-core/src/config/values.rs

0 +212 0

@@ -1,61 +1,273 b''
1	//! Parsing functions for various type of configuration values.	1	//! Parsing functions for various type of configuration values.
2	//!	2	//!
3	//! Returning `None` indicates a syntax error. Using a `Result` would be more	3	//! Returning `None` indicates a syntax error. Using a `Result` would be more
4	//! correct but would take more boilerplate for converting between error types,	4	//! correct but would take more boilerplate for converting between error types,
5	//! compared to using `.ok()` on inner results of various error types to	5	//! compared to using `.ok()` on inner results of various error types to
6	//! convert them all to options. The `Config::get_parse` method later converts	6	//! convert them all to options. The `Config::get_parse` method later converts
7	//! those options to results with `ConfigValueParseError`, which contains	7	//! those options to results with `ConfigValueParseError`, which contains
8	//! details about where the value came from (but omits details of what’s	8	//! details about where the value came from (but omits details of what’s
9	//! invalid inside the value).	9	//! invalid inside the value).
10		10
		11	use crate::utils::SliceExt;
		12
11	pub(super) fn parse_bool(v: &[u8]) -> Option<bool> {	13	pub(super) fn parse_bool(v: &[u8]) -> Option<bool> {
12	match v.to_ascii_lowercase().as_slice() {	14	match v.to_ascii_lowercase().as_slice() {
13	b"1" \| b"yes" \| b"true" \| b"on" \| b"always" => Some(true),	15	b"1" \| b"yes" \| b"true" \| b"on" \| b"always" => Some(true),
14	b"0" \| b"no" \| b"false" \| b"off" \| b"never" => Some(false),	16	b"0" \| b"no" \| b"false" \| b"off" \| b"never" => Some(false),
15	_ => None,	17	_ => None,
16	}	18	}
17	}	19	}
18		20
19	pub(super) fn parse_byte_size(value: &[u8]) -> Option<u64> {	21	pub(super) fn parse_byte_size(value: &[u8]) -> Option<u64> {
20	let value = std::str::from_utf8(value).ok()?.to_ascii_lowercase();	22	let value = std::str::from_utf8(value).ok()?.to_ascii_lowercase();
21	const UNITS: &[(&str, u64)] = &[	23	const UNITS: &[(&str, u64)] = &[
22	("g", 1 << 30),	24	("g", 1 << 30),
23	("gb", 1 << 30),	25	("gb", 1 << 30),
24	("m", 1 << 20),	26	("m", 1 << 20),
25	("mb", 1 << 20),	27	("mb", 1 << 20),
26	("k", 1 << 10),	28	("k", 1 << 10),
27	("kb", 1 << 10),	29	("kb", 1 << 10),
28	("b", 1 << 0), // Needs to be last	30	("b", 1 << 0), // Needs to be last
29	];	31	];
30	for &(unit, multiplier) in UNITS {	32	for &(unit, multiplier) in UNITS {
31	// TODO: use `value.strip_suffix(unit)` when we require Rust 1.45+	33	// TODO: use `value.strip_suffix(unit)` when we require Rust 1.45+
32	if value.ends_with(unit) {	34	if value.ends_with(unit) {
33	let value_before_unit = &value[..value.len() - unit.len()];	35	let value_before_unit = &value[..value.len() - unit.len()];
34	let float: f64 = value_before_unit.trim().parse().ok()?;	36	let float: f64 = value_before_unit.trim().parse().ok()?;
35	if float >= 0.0 {	37	if float >= 0.0 {
36	return Some((float * multiplier as f64).round() as u64);	38	return Some((float * multiplier as f64).round() as u64);
37	} else {	39	} else {
38	return None;	40	return None;
39	}	41	}
40	}	42	}
41	}	43	}
42	value.parse().ok()	44	value.parse().ok()
43	}	45	}
44		46
		47	/// Parse a config value as a list of sub-values.
		48	///
		49	/// Ported from `parselist` in `mercurial/utils/stringutil.py`
		50
		51	// Note: keep behavior in sync with the Python one.
		52
		53	// Note: this could return `Vec<Cow<[u8]>>` instead and borrow `input` when
		54	// possible (when there’s no backslash-escapes) but this is probably not worth
		55	// the complexity as config is presumably not accessed inside
		56	// preformance-sensitive loops.
		57	pub(super) fn parse_list(input: &[u8]) -> Vec<Vec<u8>> {
		58	// Port of Python’s `value.lstrip(b' ,\n')`
		59	// TODO: is this really what we want?
		60	let input =
		61	input.trim_start_matches(\|b\| b == b' ' \|\| b == b',' \|\| b == b'\n');
		62	parse_list_without_trim_start(input)
		63	}
		64
		65	fn parse_list_without_trim_start(input: &[u8]) -> Vec<Vec<u8>> {
		66	// Start of port of Python’s `_configlist`
		67	let input = input.trim_end_matches(\|b\| b == b' ' \|\| b == b',');
		68	if input.is_empty() {
		69	return Vec::new();
		70	}
		71
		72	// Just to make “a string” less confusable with “a list of strings”.
		73	type ByteString = Vec<u8>;
		74
		75	// These correspond to Python’s…
		76	let mut mode = ParserMode::Plain; // `parser`
		77	let mut values = Vec::new(); // `parts[:-1]`
		78	let mut next_value = ByteString::new(); // `parts[-1]`
		79	let mut offset = 0; // `offset`
		80
		81	// Setting `parser` to `None` is instead handled by returning immediately
		82	enum ParserMode {
		83	Plain,
		84	Quoted,
		85	}
		86
		87	loop {
		88	match mode {
		89	ParserMode::Plain => {
		90	// Start of port of Python’s `_parse_plain`
		91	let mut whitespace = false;
		92	while let Some(&byte) = input.get(offset) {
		93	if is_space(byte) \|\| byte == b',' {
		94	whitespace = true;
		95	offset += 1;
		96	} else {
		97	break;
		98	}
		99	}
		100	if let Some(&byte) = input.get(offset) {
		101	if whitespace {
		102	values.push(std::mem::take(&mut next_value))
		103	}
		104	if byte == b'"' && next_value.is_empty() {
		105	mode = ParserMode::Quoted;
		106	} else {
		107	if byte == b'"' && next_value.ends_with(b"\\") {
		108	next_value.pop();
		109	}
		110	next_value.push(byte);
		111	}
		112	offset += 1;
		113	} else {
		114	values.push(next_value);
		115	return values;
		116	}
		117	}
		118	ParserMode::Quoted => {
		119	// Start of port of Python’s `_parse_quote`
		120	if let Some(&byte) = input.get(offset) {
		121	if byte == b'"' {
		122	// The input contains a quoted zero-length value `""`
		123	debug_assert_eq!(next_value, b"");
		124	values.push(std::mem::take(&mut next_value));
		125	offset += 1;
		126	while let Some(&byte) = input.get(offset) {
		127	if is_space(byte) \|\| byte == b',' {
		128	offset += 1;
		129	} else {
		130	break;
		131	}
		132	}
		133	mode = ParserMode::Plain;
		134	continue;
		135	}
		136	}
		137
		138	while let Some(&byte) = input.get(offset) {
		139	if byte == b'"' {
		140	break;
		141	}
		142	if byte == b'\\' && input.get(offset + 1) == Some(&b'"') {
		143	next_value.push(b'"');
		144	offset += 2;
		145	} else {
		146	next_value.push(byte);
		147	offset += 1;
		148	}
		149	}
		150
		151	if offset >= input.len() {
		152	// We didn’t find a closing double-quote,
		153	// so treat the opening one as part of an unquoted value
		154	// instead of delimiting the start of a quoted value.
		155
		156	// `next_value` may have had some backslash-escapes
		157	// unescaped. TODO: shouldn’t we use a slice of `input`
		158	// instead?
		159	let mut real_values =
		160	parse_list_without_trim_start(&next_value);
		161
		162	if let Some(first) = real_values.first_mut() {
		163	first.insert(0, b'"');
		164	// Drop `next_value`
		165	values.extend(real_values)
		166	} else {
		167	next_value.push(b'"');
		168	values.push(next_value);
		169	}
		170	return values;
		171	}
		172
		173	// We’re not at the end of the input, which means the `while`
		174	// loop above ended at at double quote. Skip
		175	// over that.
		176	offset += 1;
		177
		178	while let Some(&byte) = input.get(offset) {
		179	if byte == b' ' \|\| byte == b',' {
		180	offset += 1;
		181	} else {
		182	break;
		183	}
		184	}
		185
		186	if offset >= input.len() {
		187	values.push(next_value);
		188	return values;
		189	}
		190
		191	if offset + 1 == input.len() && input[offset] == b'"' {
		192	next_value.push(b'"');
		193	offset += 1;
		194	} else {
		195	values.push(std::mem::take(&mut next_value));
		196	}
		197
		198	mode = ParserMode::Plain;
		199	}
		200	}
		201	}
		202
		203	// https://docs.python.org/3/library/stdtypes.html?#bytes.isspace
		204	fn is_space(byte: u8) -> bool {
		205	if let b' ' \| b'\t' \| b'\n' \| b'\r' \| b'\x0b' \| b'\x0c' = byte {
		206	true
		207	} else {
		208	false
		209	}
		210	}
		211	}
		212
		213	#[test]
		214	fn test_parse_list() {
		215	// Make `assert_eq` error messages nicer
		216	fn as_strings(values: &[Vec<u8>]) -> Vec<String> {
		217	values
		218	.iter()
		219	.map(\|v\| std::str::from_utf8(v.as_ref()).unwrap().to_owned())
		220	.collect()
		221	}
		222	macro_rules! assert_parse_list {
		223	( $input: expr => [ $( $output: expr ),* ] ) => {
		224	assert_eq!(
		225	as_strings(&parse_list($input)),
		226	as_strings(&[ $( Vec::from(&$output[..]) ),* ]),
		227	);
		228	}
		229	}
		230
		231	// Keep these Rust tests in sync with the Python ones in
		232	// `tests/test-config-parselist.py`
		233	assert_parse_list!(b"" => []);
		234	assert_parse_list!(b"," => []);
		235	assert_parse_list!(b"A" => [b"A"]);
		236	assert_parse_list!(b"B,B" => [b"B", b"B"]);
		237	assert_parse_list!(b", C, ,C," => [b"C", b"C"]);
		238	assert_parse_list!(b"\"" => [b"\""]);
		239	assert_parse_list!(b"\"\"" => [b"", b""]);
		240	assert_parse_list!(b"D,\"" => [b"D", b"\""]);
		241	assert_parse_list!(b"E,\"\"" => [b"E", b"", b""]);
		242	assert_parse_list!(b"\"F,F\"" => [b"F,F"]);
		243	assert_parse_list!(b"\"G,G" => [b"\"G", b"G"]);
		244	assert_parse_list!(b"\"H \\\",\\\"H" => [b"\"H", b",", b"H"]);
		245	assert_parse_list!(b"I,I\"" => [b"I", b"I\""]);
		246	assert_parse_list!(b"J,\"J" => [b"J", b"\"J"]);
		247	assert_parse_list!(b"K K" => [b"K", b"K"]);
		248	assert_parse_list!(b"\"K\" K" => [b"K", b"K"]);
		249	assert_parse_list!(b"L\tL" => [b"L", b"L"]);
		250	assert_parse_list!(b"\"L\"\tL" => [b"L", b"", b"L"]);
		251	assert_parse_list!(b"M\x0bM" => [b"M", b"M"]);
		252	assert_parse_list!(b"\"M\"\x0bM" => [b"M", b"", b"M"]);
		253	assert_parse_list!(b"\"N\" , ,\"" => [b"N\""]);
		254	assert_parse_list!(b"\" ,O, " => [b"\"", b"O"]);
		255	}
		256
45	#[test]	257	#[test]
46	fn test_parse_byte_size() {	258	fn test_parse_byte_size() {
47	assert_eq!(parse_byte_size(b""), None);	259	assert_eq!(parse_byte_size(b""), None);
48	assert_eq!(parse_byte_size(b"b"), None);	260	assert_eq!(parse_byte_size(b"b"), None);
49		261
50	assert_eq!(parse_byte_size(b"12"), Some(12));	262	assert_eq!(parse_byte_size(b"12"), Some(12));
51	assert_eq!(parse_byte_size(b"12b"), Some(12));	263	assert_eq!(parse_byte_size(b"12b"), Some(12));
52	assert_eq!(parse_byte_size(b"12 b"), Some(12));	264	assert_eq!(parse_byte_size(b"12 b"), Some(12));
53	assert_eq!(parse_byte_size(b"12.1 b"), Some(12));	265	assert_eq!(parse_byte_size(b"12.1 b"), Some(12));
54	assert_eq!(parse_byte_size(b"1.1 K"), Some(1126));	266	assert_eq!(parse_byte_size(b"1.1 K"), Some(1126));
55	assert_eq!(parse_byte_size(b"1.1 kB"), Some(1126));	267	assert_eq!(parse_byte_size(b"1.1 kB"), Some(1126));
56		268
57	assert_eq!(parse_byte_size(b"-12 b"), None);	269	assert_eq!(parse_byte_size(b"-12 b"), None);
58	assert_eq!(parse_byte_size(b"-0.1 b"), None);	270	assert_eq!(parse_byte_size(b"-0.1 b"), None);
59	assert_eq!(parse_byte_size(b"0.1 b"), Some(0));	271	assert_eq!(parse_byte_size(b"0.1 b"), Some(0));
60	assert_eq!(parse_byte_size(b"12.1 b"), Some(12));	272	assert_eq!(parse_byte_size(b"12.1 b"), Some(12));
61	}	273	}

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages