//! Parsing functions for various type of configuration values. //! //! Returning `None` indicates a syntax error. Using a `Result` would be more //! correct but would take more boilerplate for converting between error types, //! compared to using `.ok()` on inner results of various error types to //! convert them all to options. The `Config::get_parse` method later converts //! those options to results with `ConfigValueParseError`, which contains //! details about where the value came from (but omits details of what’s //! invalid inside the value). use crate::utils::SliceExt; pub(super) fn parse_bool(v: &[u8]) -> Option<bool> { match v.to_ascii_lowercase().as_slice() { b"1" | b"yes" | b"true" | b"on" | b"always" => Some(true), b"0" | b"no" | b"false" | b"off" | b"never" => Some(false), _ => None, } } pub(super) fn parse_byte_size(value: &[u8]) -> Option<u64> { let value = std::str::from_utf8(value).ok()?.to_ascii_lowercase(); const UNITS: &[(&str, u64)] = &[ ("g", 1 << 30), ("gb", 1 << 30), ("m", 1 << 20), ("mb", 1 << 20), ("k", 1 << 10), ("kb", 1 << 10), ("b", 1 << 0), // Needs to be last ]; for &(unit, multiplier) in UNITS { // TODO: use `value.strip_suffix(unit)` when we require Rust 1.45+ if value.ends_with(unit) { let value_before_unit = &value[..value.len() - unit.len()]; let float: f64 = value_before_unit.trim().parse().ok()?; if float >= 0.0 { return Some((float * multiplier as f64).round() as u64); } else { return None; } } } value.parse().ok() } /// Parse a config value as a list of sub-values. /// /// Ported from `parselist` in `mercurial/utils/stringutil.py` // Note: keep behavior in sync with the Python one. // Note: this could return `Vec<Cow<[u8]>>` instead and borrow `input` when // possible (when there’s no backslash-escapes) but this is probably not worth // the complexity as config is presumably not accessed inside // preformance-sensitive loops. pub(super) fn parse_list(input: &[u8]) -> Vec<Vec<u8>> { // Port of Python’s `value.lstrip(b' ,\n')` // TODO: is this really what we want? let input = input.trim_start_matches(|b| b == b' ' || b == b',' || b == b'\n'); parse_list_without_trim_start(input) } fn parse_list_without_trim_start(input: &[u8]) -> Vec<Vec<u8>> { // Start of port of Python’s `_configlist` let input = input.trim_end_matches(|b| b == b' ' || b == b','); if input.is_empty() { return Vec::new(); } // Just to make “a string” less confusable with “a list of strings”. type ByteString = Vec<u8>; // These correspond to Python’s… let mut mode = ParserMode::Plain; // `parser` let mut values = Vec::new(); // `parts[:-1]` let mut next_value = ByteString::new(); // `parts[-1]` let mut offset = 0; // `offset` // Setting `parser` to `None` is instead handled by returning immediately enum ParserMode { Plain, Quoted, } loop { match mode { ParserMode::Plain => { // Start of port of Python’s `_parse_plain` let mut whitespace = false; while let Some(&byte) = input.get(offset) { if is_space(byte) || byte == b',' { whitespace = true; offset += 1; } else { break; } } if let Some(&byte) = input.get(offset) { if whitespace { values.push(std::mem::take(&mut next_value)) } if byte == b'"' && next_value.is_empty() { mode = ParserMode::Quoted; } else { if byte == b'"' && next_value.ends_with(b"\\") { next_value.pop(); } next_value.push(byte); } offset += 1; } else { values.push(next_value); return values; } } ParserMode::Quoted => { // Start of port of Python’s `_parse_quote` if let Some(&byte) = input.get(offset) { if byte == b'"' { // The input contains a quoted zero-length value `""` debug_assert_eq!(next_value, b""); values.push(std::mem::take(&mut next_value)); offset += 1; while let Some(&byte) = input.get(offset) { if is_space(byte) || byte == b',' { offset += 1; } else { break; } } mode = ParserMode::Plain; continue; } } while let Some(&byte) = input.get(offset) { if byte == b'"' { break; } if byte == b'\\' && input.get(offset + 1) == Some(&b'"') { next_value.push(b'"'); offset += 2; } else { next_value.push(byte); offset += 1; } } if offset >= input.len() { // We didn’t find a closing double-quote, // so treat the opening one as part of an unquoted value // instead of delimiting the start of a quoted value. // `next_value` may have had some backslash-escapes // unescaped. TODO: shouldn’t we use a slice of `input` // instead? let mut real_values = parse_list_without_trim_start(&next_value); if let Some(first) = real_values.first_mut() { first.insert(0, b'"'); // Drop `next_value` values.extend(real_values) } else { next_value.push(b'"'); values.push(next_value); } return values; } // We’re not at the end of the input, which means the `while` // loop above ended at at double quote. Skip // over that. offset += 1; while let Some(&byte) = input.get(offset) { if byte == b' ' || byte == b',' { offset += 1; } else { break; } } if offset >= input.len() { values.push(next_value); return values; } if offset + 1 == input.len() && input[offset] == b'"' { next_value.push(b'"'); offset += 1; } else { values.push(std::mem::take(&mut next_value)); } mode = ParserMode::Plain; } } } // https://docs.python.org/3/library/stdtypes.html?#bytes.isspace fn is_space(byte: u8) -> bool { if let b' ' | b'\t' | b'\n' | b'\r' | b'\x0b' | b'\x0c' = byte { true } else { false } } } #[test] fn test_parse_list() { // Make `assert_eq` error messages nicer fn as_strings(values: &[Vec<u8>]) -> Vec<String> { values .iter() .map(|v| std::str::from_utf8(v.as_ref()).unwrap().to_owned()) .collect() } macro_rules! assert_parse_list { ( $input: expr => [ $( $output: expr ),* ] ) => { assert_eq!( as_strings(&parse_list($input)), as_strings(&[ $( Vec::from(&$output[..]) ),* ]), ); } } // Keep these Rust tests in sync with the Python ones in // `tests/test-config-parselist.py` assert_parse_list!(b"" => []); assert_parse_list!(b"," => []); assert_parse_list!(b"A" => [b"A"]); assert_parse_list!(b"B,B" => [b"B", b"B"]); assert_parse_list!(b", C, ,C," => [b"C", b"C"]); assert_parse_list!(b"\"" => [b"\""]); assert_parse_list!(b"\"\"" => [b"", b""]); assert_parse_list!(b"D,\"" => [b"D", b"\""]); assert_parse_list!(b"E,\"\"" => [b"E", b"", b""]); assert_parse_list!(b"\"F,F\"" => [b"F,F"]); assert_parse_list!(b"\"G,G" => [b"\"G", b"G"]); assert_parse_list!(b"\"H \\\",\\\"H" => [b"\"H", b",", b"H"]); assert_parse_list!(b"I,I\"" => [b"I", b"I\""]); assert_parse_list!(b"J,\"J" => [b"J", b"\"J"]); assert_parse_list!(b"K K" => [b"K", b"K"]); assert_parse_list!(b"\"K\" K" => [b"K", b"K"]); assert_parse_list!(b"L\tL" => [b"L", b"L"]); assert_parse_list!(b"\"L\"\tL" => [b"L", b"", b"L"]); assert_parse_list!(b"M\x0bM" => [b"M", b"M"]); assert_parse_list!(b"\"M\"\x0bM" => [b"M", b"", b"M"]); assert_parse_list!(b"\"N\" , ,\"" => [b"N\""]); assert_parse_list!(b"\" ,O, " => [b"\"", b"O"]); } #[test] fn test_parse_byte_size() { assert_eq!(parse_byte_size(b""), None); assert_eq!(parse_byte_size(b"b"), None); assert_eq!(parse_byte_size(b"12"), Some(12)); assert_eq!(parse_byte_size(b"12b"), Some(12)); assert_eq!(parse_byte_size(b"12 b"), Some(12)); assert_eq!(parse_byte_size(b"12.1 b"), Some(12)); assert_eq!(parse_byte_size(b"1.1 K"), Some(1126)); assert_eq!(parse_byte_size(b"1.1 kB"), Some(1126)); assert_eq!(parse_byte_size(b"-12 b"), None); assert_eq!(parse_byte_size(b"-0.1 b"), None); assert_eq!(parse_byte_size(b"0.1 b"), Some(0)); assert_eq!(parse_byte_size(b"12.1 b"), Some(12)); }