values.rs
267 lines
| 9.8 KiB
| application/rls-services+xml
|
RustLexer
Simon Sapin
|
r47344 | //! Parsing functions for various type of configuration values. | ||
//! | ||||
//! Returning `None` indicates a syntax error. Using a `Result` would be more | ||||
//! correct but would take more boilerplate for converting between error types, | ||||
//! compared to using `.ok()` on inner results of various error types to | ||||
//! convert them all to options. The `Config::get_parse` method later converts | ||||
//! those options to results with `ConfigValueParseError`, which contains | ||||
//! details about where the value came from (but omits details of what’s | ||||
//! invalid inside the value). | ||||
Simon Sapin
|
r48762 | use crate::utils::SliceExt; | ||
Simon Sapin
|
r47344 | pub(super) fn parse_bool(v: &[u8]) -> Option<bool> { | ||
match v.to_ascii_lowercase().as_slice() { | ||||
b"1" | b"yes" | b"true" | b"on" | b"always" => Some(true), | ||||
b"0" | b"no" | b"false" | b"off" | b"never" => Some(false), | ||||
_ => None, | ||||
} | ||||
} | ||||
pub(super) fn parse_byte_size(value: &[u8]) -> Option<u64> { | ||||
let value = std::str::from_utf8(value).ok()?.to_ascii_lowercase(); | ||||
const UNITS: &[(&str, u64)] = &[ | ||||
("g", 1 << 30), | ||||
("gb", 1 << 30), | ||||
("m", 1 << 20), | ||||
("mb", 1 << 20), | ||||
("k", 1 << 10), | ||||
("kb", 1 << 10), | ||||
("b", 1 << 0), // Needs to be last | ||||
]; | ||||
for &(unit, multiplier) in UNITS { | ||||
Raphaël Gomès
|
r50529 | if let Some(value) = value.strip_suffix(unit) { | ||
let float: f64 = value.trim().parse().ok()?; | ||||
Simon Sapin
|
r47344 | if float >= 0.0 { | ||
return Some((float * multiplier as f64).round() as u64); | ||||
} else { | ||||
return None; | ||||
} | ||||
} | ||||
} | ||||
value.parse().ok() | ||||
} | ||||
Simon Sapin
|
r47345 | |||
Simon Sapin
|
r48762 | /// Parse a config value as a list of sub-values. | ||
/// | ||||
/// Ported from `parselist` in `mercurial/utils/stringutil.py` | ||||
// Note: keep behavior in sync with the Python one. | ||||
// Note: this could return `Vec<Cow<[u8]>>` instead and borrow `input` when | ||||
// possible (when there’s no backslash-escapes) but this is probably not worth | ||||
// the complexity as config is presumably not accessed inside | ||||
// preformance-sensitive loops. | ||||
pub(super) fn parse_list(input: &[u8]) -> Vec<Vec<u8>> { | ||||
// Port of Python’s `value.lstrip(b' ,\n')` | ||||
// TODO: is this really what we want? | ||||
let input = | ||||
input.trim_start_matches(|b| b == b' ' || b == b',' || b == b'\n'); | ||||
parse_list_without_trim_start(input) | ||||
} | ||||
fn parse_list_without_trim_start(input: &[u8]) -> Vec<Vec<u8>> { | ||||
// Start of port of Python’s `_configlist` | ||||
let input = input.trim_end_matches(|b| b == b' ' || b == b','); | ||||
if input.is_empty() { | ||||
return Vec::new(); | ||||
} | ||||
// Just to make “a string” less confusable with “a list of strings”. | ||||
type ByteString = Vec<u8>; | ||||
// These correspond to Python’s… | ||||
let mut mode = ParserMode::Plain; // `parser` | ||||
let mut values = Vec::new(); // `parts[:-1]` | ||||
let mut next_value = ByteString::new(); // `parts[-1]` | ||||
let mut offset = 0; // `offset` | ||||
// Setting `parser` to `None` is instead handled by returning immediately | ||||
enum ParserMode { | ||||
Plain, | ||||
Quoted, | ||||
} | ||||
loop { | ||||
match mode { | ||||
ParserMode::Plain => { | ||||
// Start of port of Python’s `_parse_plain` | ||||
let mut whitespace = false; | ||||
while let Some(&byte) = input.get(offset) { | ||||
if is_space(byte) || byte == b',' { | ||||
whitespace = true; | ||||
offset += 1; | ||||
} else { | ||||
break; | ||||
} | ||||
} | ||||
if let Some(&byte) = input.get(offset) { | ||||
if whitespace { | ||||
values.push(std::mem::take(&mut next_value)) | ||||
} | ||||
if byte == b'"' && next_value.is_empty() { | ||||
mode = ParserMode::Quoted; | ||||
} else { | ||||
if byte == b'"' && next_value.ends_with(b"\\") { | ||||
next_value.pop(); | ||||
} | ||||
next_value.push(byte); | ||||
} | ||||
offset += 1; | ||||
} else { | ||||
values.push(next_value); | ||||
return values; | ||||
} | ||||
} | ||||
ParserMode::Quoted => { | ||||
// Start of port of Python’s `_parse_quote` | ||||
if let Some(&byte) = input.get(offset) { | ||||
if byte == b'"' { | ||||
// The input contains a quoted zero-length value `""` | ||||
debug_assert_eq!(next_value, b""); | ||||
values.push(std::mem::take(&mut next_value)); | ||||
offset += 1; | ||||
while let Some(&byte) = input.get(offset) { | ||||
if is_space(byte) || byte == b',' { | ||||
offset += 1; | ||||
} else { | ||||
break; | ||||
} | ||||
} | ||||
mode = ParserMode::Plain; | ||||
continue; | ||||
} | ||||
} | ||||
while let Some(&byte) = input.get(offset) { | ||||
if byte == b'"' { | ||||
break; | ||||
} | ||||
if byte == b'\\' && input.get(offset + 1) == Some(&b'"') { | ||||
next_value.push(b'"'); | ||||
offset += 2; | ||||
} else { | ||||
next_value.push(byte); | ||||
offset += 1; | ||||
} | ||||
} | ||||
if offset >= input.len() { | ||||
// We didn’t find a closing double-quote, | ||||
// so treat the opening one as part of an unquoted value | ||||
// instead of delimiting the start of a quoted value. | ||||
// `next_value` may have had some backslash-escapes | ||||
// unescaped. TODO: shouldn’t we use a slice of `input` | ||||
// instead? | ||||
let mut real_values = | ||||
parse_list_without_trim_start(&next_value); | ||||
if let Some(first) = real_values.first_mut() { | ||||
first.insert(0, b'"'); | ||||
// Drop `next_value` | ||||
values.extend(real_values) | ||||
} else { | ||||
next_value.push(b'"'); | ||||
values.push(next_value); | ||||
} | ||||
return values; | ||||
} | ||||
// We’re not at the end of the input, which means the `while` | ||||
// loop above ended at at double quote. Skip | ||||
// over that. | ||||
offset += 1; | ||||
while let Some(&byte) = input.get(offset) { | ||||
if byte == b' ' || byte == b',' { | ||||
offset += 1; | ||||
} else { | ||||
break; | ||||
} | ||||
} | ||||
if offset >= input.len() { | ||||
values.push(next_value); | ||||
return values; | ||||
} | ||||
if offset + 1 == input.len() && input[offset] == b'"' { | ||||
next_value.push(b'"'); | ||||
offset += 1; | ||||
} else { | ||||
values.push(std::mem::take(&mut next_value)); | ||||
} | ||||
mode = ParserMode::Plain; | ||||
} | ||||
} | ||||
} | ||||
// https://docs.python.org/3/library/stdtypes.html?#bytes.isspace | ||||
fn is_space(byte: u8) -> bool { | ||||
Raphaël Gomès
|
r50825 | matches!(byte, b' ' | b'\t' | b'\n' | b'\r' | b'\x0b' | b'\x0c') | ||
Simon Sapin
|
r48762 | } | ||
} | ||||
#[test] | ||||
fn test_parse_list() { | ||||
// Make `assert_eq` error messages nicer | ||||
fn as_strings(values: &[Vec<u8>]) -> Vec<String> { | ||||
values | ||||
.iter() | ||||
.map(|v| std::str::from_utf8(v.as_ref()).unwrap().to_owned()) | ||||
.collect() | ||||
} | ||||
macro_rules! assert_parse_list { | ||||
( $input: expr => [ $( $output: expr ),* ] ) => { | ||||
assert_eq!( | ||||
as_strings(&parse_list($input)), | ||||
as_strings(&[ $( Vec::from(&$output[..]) ),* ]), | ||||
); | ||||
} | ||||
} | ||||
// Keep these Rust tests in sync with the Python ones in | ||||
// `tests/test-config-parselist.py` | ||||
assert_parse_list!(b"" => []); | ||||
assert_parse_list!(b"," => []); | ||||
assert_parse_list!(b"A" => [b"A"]); | ||||
assert_parse_list!(b"B,B" => [b"B", b"B"]); | ||||
assert_parse_list!(b", C, ,C," => [b"C", b"C"]); | ||||
assert_parse_list!(b"\"" => [b"\""]); | ||||
assert_parse_list!(b"\"\"" => [b"", b""]); | ||||
assert_parse_list!(b"D,\"" => [b"D", b"\""]); | ||||
assert_parse_list!(b"E,\"\"" => [b"E", b"", b""]); | ||||
assert_parse_list!(b"\"F,F\"" => [b"F,F"]); | ||||
assert_parse_list!(b"\"G,G" => [b"\"G", b"G"]); | ||||
assert_parse_list!(b"\"H \\\",\\\"H" => [b"\"H", b",", b"H"]); | ||||
assert_parse_list!(b"I,I\"" => [b"I", b"I\""]); | ||||
assert_parse_list!(b"J,\"J" => [b"J", b"\"J"]); | ||||
assert_parse_list!(b"K K" => [b"K", b"K"]); | ||||
assert_parse_list!(b"\"K\" K" => [b"K", b"K"]); | ||||
assert_parse_list!(b"L\tL" => [b"L", b"L"]); | ||||
assert_parse_list!(b"\"L\"\tL" => [b"L", b"", b"L"]); | ||||
assert_parse_list!(b"M\x0bM" => [b"M", b"M"]); | ||||
assert_parse_list!(b"\"M\"\x0bM" => [b"M", b"", b"M"]); | ||||
assert_parse_list!(b"\"N\" , ,\"" => [b"N\""]); | ||||
assert_parse_list!(b"\" ,O, " => [b"\"", b"O"]); | ||||
} | ||||
Simon Sapin
|
r47345 | #[test] | ||
fn test_parse_byte_size() { | ||||
assert_eq!(parse_byte_size(b""), None); | ||||
assert_eq!(parse_byte_size(b"b"), None); | ||||
assert_eq!(parse_byte_size(b"12"), Some(12)); | ||||
assert_eq!(parse_byte_size(b"12b"), Some(12)); | ||||
assert_eq!(parse_byte_size(b"12 b"), Some(12)); | ||||
assert_eq!(parse_byte_size(b"12.1 b"), Some(12)); | ||||
assert_eq!(parse_byte_size(b"1.1 K"), Some(1126)); | ||||
assert_eq!(parse_byte_size(b"1.1 kB"), Some(1126)); | ||||
assert_eq!(parse_byte_size(b"-12 b"), None); | ||||
assert_eq!(parse_byte_size(b"-0.1 b"), None); | ||||
assert_eq!(parse_byte_size(b"0.1 b"), Some(0)); | ||||
assert_eq!(parse_byte_size(b"12.1 b"), Some(12)); | ||||
} | ||||