##// END OF EJS Templates
dirstate-v2: Add storage space for nanoseconds precision in file mtimes...
dirstate-v2: Add storage space for nanoseconds precision in file mtimes For now the sub-second component is always set to zero for tracked files and symlinks. (The mtime of directories for the `readdir`-skipping optimization is a different code path and already uses the full precision available.) This extra storage uses the space previously freed by replacing the 32-bit `mode` field by two bits in the existing `flags` field, so the overall size of nodes is unchanged. (This space had been left as padding for this purpose.) Also move things around in the node layout and documentation to have less duplication. Now that they have the same representation, directory mtime and file mtime are kept in the same field. (Only either one can exist for a given node.) Differential Revision: https://phab.mercurial-scm.org/D11655

File last commit:

r48762:6961eca0 default
r49033:308d9c24 default
Show More
values.rs
273 lines | 10.0 KiB | application/rls-services+xml | RustLexer
//! Parsing functions for various type of configuration values.
//!
//! Returning `None` indicates a syntax error. Using a `Result` would be more
//! correct but would take more boilerplate for converting between error types,
//! compared to using `.ok()` on inner results of various error types to
//! convert them all to options. The `Config::get_parse` method later converts
//! those options to results with `ConfigValueParseError`, which contains
//! details about where the value came from (but omits details of what’s
//! invalid inside the value).
use crate::utils::SliceExt;
pub(super) fn parse_bool(v: &[u8]) -> Option<bool> {
match v.to_ascii_lowercase().as_slice() {
b"1" | b"yes" | b"true" | b"on" | b"always" => Some(true),
b"0" | b"no" | b"false" | b"off" | b"never" => Some(false),
_ => None,
}
}
pub(super) fn parse_byte_size(value: &[u8]) -> Option<u64> {
let value = std::str::from_utf8(value).ok()?.to_ascii_lowercase();
const UNITS: &[(&str, u64)] = &[
("g", 1 << 30),
("gb", 1 << 30),
("m", 1 << 20),
("mb", 1 << 20),
("k", 1 << 10),
("kb", 1 << 10),
("b", 1 << 0), // Needs to be last
];
for &(unit, multiplier) in UNITS {
// TODO: use `value.strip_suffix(unit)` when we require Rust 1.45+
if value.ends_with(unit) {
let value_before_unit = &value[..value.len() - unit.len()];
let float: f64 = value_before_unit.trim().parse().ok()?;
if float >= 0.0 {
return Some((float * multiplier as f64).round() as u64);
} else {
return None;
}
}
}
value.parse().ok()
}
/// Parse a config value as a list of sub-values.
///
/// Ported from `parselist` in `mercurial/utils/stringutil.py`
// Note: keep behavior in sync with the Python one.
// Note: this could return `Vec<Cow<[u8]>>` instead and borrow `input` when
// possible (when there’s no backslash-escapes) but this is probably not worth
// the complexity as config is presumably not accessed inside
// preformance-sensitive loops.
pub(super) fn parse_list(input: &[u8]) -> Vec<Vec<u8>> {
// Port of Python’s `value.lstrip(b' ,\n')`
// TODO: is this really what we want?
let input =
input.trim_start_matches(|b| b == b' ' || b == b',' || b == b'\n');
parse_list_without_trim_start(input)
}
fn parse_list_without_trim_start(input: &[u8]) -> Vec<Vec<u8>> {
// Start of port of Python’s `_configlist`
let input = input.trim_end_matches(|b| b == b' ' || b == b',');
if input.is_empty() {
return Vec::new();
}
// Just to make “a string” less confusable with “a list of strings”.
type ByteString = Vec<u8>;
// These correspond to Python’s…
let mut mode = ParserMode::Plain; // `parser`
let mut values = Vec::new(); // `parts[:-1]`
let mut next_value = ByteString::new(); // `parts[-1]`
let mut offset = 0; // `offset`
// Setting `parser` to `None` is instead handled by returning immediately
enum ParserMode {
Plain,
Quoted,
}
loop {
match mode {
ParserMode::Plain => {
// Start of port of Python’s `_parse_plain`
let mut whitespace = false;
while let Some(&byte) = input.get(offset) {
if is_space(byte) || byte == b',' {
whitespace = true;
offset += 1;
} else {
break;
}
}
if let Some(&byte) = input.get(offset) {
if whitespace {
values.push(std::mem::take(&mut next_value))
}
if byte == b'"' && next_value.is_empty() {
mode = ParserMode::Quoted;
} else {
if byte == b'"' && next_value.ends_with(b"\\") {
next_value.pop();
}
next_value.push(byte);
}
offset += 1;
} else {
values.push(next_value);
return values;
}
}
ParserMode::Quoted => {
// Start of port of Python’s `_parse_quote`
if let Some(&byte) = input.get(offset) {
if byte == b'"' {
// The input contains a quoted zero-length value `""`
debug_assert_eq!(next_value, b"");
values.push(std::mem::take(&mut next_value));
offset += 1;
while let Some(&byte) = input.get(offset) {
if is_space(byte) || byte == b',' {
offset += 1;
} else {
break;
}
}
mode = ParserMode::Plain;
continue;
}
}
while let Some(&byte) = input.get(offset) {
if byte == b'"' {
break;
}
if byte == b'\\' && input.get(offset + 1) == Some(&b'"') {
next_value.push(b'"');
offset += 2;
} else {
next_value.push(byte);
offset += 1;
}
}
if offset >= input.len() {
// We didn’t find a closing double-quote,
// so treat the opening one as part of an unquoted value
// instead of delimiting the start of a quoted value.
// `next_value` may have had some backslash-escapes
// unescaped. TODO: shouldn’t we use a slice of `input`
// instead?
let mut real_values =
parse_list_without_trim_start(&next_value);
if let Some(first) = real_values.first_mut() {
first.insert(0, b'"');
// Drop `next_value`
values.extend(real_values)
} else {
next_value.push(b'"');
values.push(next_value);
}
return values;
}
// We’re not at the end of the input, which means the `while`
// loop above ended at at double quote. Skip
// over that.
offset += 1;
while let Some(&byte) = input.get(offset) {
if byte == b' ' || byte == b',' {
offset += 1;
} else {
break;
}
}
if offset >= input.len() {
values.push(next_value);
return values;
}
if offset + 1 == input.len() && input[offset] == b'"' {
next_value.push(b'"');
offset += 1;
} else {
values.push(std::mem::take(&mut next_value));
}
mode = ParserMode::Plain;
}
}
}
// https://docs.python.org/3/library/stdtypes.html?#bytes.isspace
fn is_space(byte: u8) -> bool {
if let b' ' | b'\t' | b'\n' | b'\r' | b'\x0b' | b'\x0c' = byte {
true
} else {
false
}
}
}
#[test]
fn test_parse_list() {
// Make `assert_eq` error messages nicer
fn as_strings(values: &[Vec<u8>]) -> Vec<String> {
values
.iter()
.map(|v| std::str::from_utf8(v.as_ref()).unwrap().to_owned())
.collect()
}
macro_rules! assert_parse_list {
( $input: expr => [ $( $output: expr ),* ] ) => {
assert_eq!(
as_strings(&parse_list($input)),
as_strings(&[ $( Vec::from(&$output[..]) ),* ]),
);
}
}
// Keep these Rust tests in sync with the Python ones in
// `tests/test-config-parselist.py`
assert_parse_list!(b"" => []);
assert_parse_list!(b"," => []);
assert_parse_list!(b"A" => [b"A"]);
assert_parse_list!(b"B,B" => [b"B", b"B"]);
assert_parse_list!(b", C, ,C," => [b"C", b"C"]);
assert_parse_list!(b"\"" => [b"\""]);
assert_parse_list!(b"\"\"" => [b"", b""]);
assert_parse_list!(b"D,\"" => [b"D", b"\""]);
assert_parse_list!(b"E,\"\"" => [b"E", b"", b""]);
assert_parse_list!(b"\"F,F\"" => [b"F,F"]);
assert_parse_list!(b"\"G,G" => [b"\"G", b"G"]);
assert_parse_list!(b"\"H \\\",\\\"H" => [b"\"H", b",", b"H"]);
assert_parse_list!(b"I,I\"" => [b"I", b"I\""]);
assert_parse_list!(b"J,\"J" => [b"J", b"\"J"]);
assert_parse_list!(b"K K" => [b"K", b"K"]);
assert_parse_list!(b"\"K\" K" => [b"K", b"K"]);
assert_parse_list!(b"L\tL" => [b"L", b"L"]);
assert_parse_list!(b"\"L\"\tL" => [b"L", b"", b"L"]);
assert_parse_list!(b"M\x0bM" => [b"M", b"M"]);
assert_parse_list!(b"\"M\"\x0bM" => [b"M", b"", b"M"]);
assert_parse_list!(b"\"N\" , ,\"" => [b"N\""]);
assert_parse_list!(b"\" ,O, " => [b"\"", b"O"]);
}
#[test]
fn test_parse_byte_size() {
assert_eq!(parse_byte_size(b""), None);
assert_eq!(parse_byte_size(b"b"), None);
assert_eq!(parse_byte_size(b"12"), Some(12));
assert_eq!(parse_byte_size(b"12b"), Some(12));
assert_eq!(parse_byte_size(b"12 b"), Some(12));
assert_eq!(parse_byte_size(b"12.1 b"), Some(12));
assert_eq!(parse_byte_size(b"1.1 K"), Some(1126));
assert_eq!(parse_byte_size(b"1.1 kB"), Some(1126));
assert_eq!(parse_byte_size(b"-12 b"), None);
assert_eq!(parse_byte_size(b"-0.1 b"), None);
assert_eq!(parse_byte_size(b"0.1 b"), Some(0));
assert_eq!(parse_byte_size(b"12.1 b"), Some(12));
}