upstream/mercurial-mirror Files · rust/hg-core/src/config/values.rs

tests: disable a section of `test-hgrc.t` that may hit a zeroconf bug...

tests: disable a section of `test-hgrc.t` that may hit a zeroconf bug This effectively re-disables the same test as cce9e7d2fb92, but unconditionally because it's not a pyoxidizer-specific problem (see below and 997c9b2069d1). I can run the test locally fine, with the same venv as CI is using, and have had multiple CI runs that don't hit this. But one failed with this: --- /private/tmp/mercurial-ci/tests/test-hgrc.t +++ /private/tmp/mercurial-ci/tests/test-hgrc.t.err @@ -305,5 +305,17 @@ [255] $ HGRCSKIPREPO=1 hg paths --config extensions.zeroconf= + Traceback (most recent call last): + File "/private/tmp/hgtests.7idf706t/install/lib/python/hgext/zeroconf/Zeroconf.py", line 966, in run + self.readers[sock].handle_read() + File "/private/tmp/hgtests.7idf706t/install/lib/python/hgext/zeroconf/Zeroconf.py", line 1020, in handle_read + msg = DNSIncoming(data) + File "/private/tmp/hgtests.7idf706t/install/lib/python/hgext/zeroconf/Zeroconf.py", line 537, in __init__ + self.readOthers() + File "/private/tmp/hgtests.7idf706t/install/lib/python/hgext/zeroconf/Zeroconf.py", line 650, in readOthers + self.readCharacterString(), + File "/private/tmp/hgtests.7idf706t/install/lib/python/hgext/zeroconf/Zeroconf.py", line 584, in readCharacterString + length = ord(self.data[self.offset]) + TypeError: ord() expected string of length 1, but int found foo = $TESTTMP/bar The zeroconf extension has bytes vs str problems that are obvious from inspection alone, and nobody has complained, so I'm not going to let this block getting CI for macOS up and running. Given that it's in the packet read code, I suspect that this 1) requires something on the network to speak mDNS, and 2) it is a timing issue if this is seen or not. (The bytes vs str issue itself is real, but only happen if a response is received quickly.)

Raphaël Gomès - - Load All Authors

File last commit:

r50825:e98fd81b default


                r53048:74e16d8c

stable

Download file

             values.rs
        
                    267 lines
            
             | 9.8 KiB
            
                | application/rls-services+xml
            
             |
                RustLexer
            
             / rust / hg-core / src / config / values.rs
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      //! Parsing functions for various type of configuration values.

      //!

      //! Returning `None` indicates a syntax error. Using a `Result` would be more

      //! correct but would take more boilerplate for converting between error types,

      //! compared to using `.ok()` on inner results of various error types to

      //! convert them all to options. The `Config::get_parse` method later converts

      //! those options to results with `ConfigValueParseError`, which contains

      //! details about where the value came from (but omits details of what’s

      //! invalid inside the value).

      use crate::utils::SliceExt;

      pub(super) fn parse_bool(v: &[u8]) -> Option<bool> {

          match v.to_ascii_lowercase().as_slice() {

              b"1" | b"yes" | b"true" | b"on" | b"always" => Some(true),

              b"0" | b"no" | b"false" | b"off" | b"never" => Some(false),

              _ => None,

          }

      }

      pub(super) fn parse_byte_size(value: &[u8]) -> Option<u64> {

          let value = std::str::from_utf8(value).ok()?.to_ascii_lowercase();

          const UNITS: &[(&str, u64)] = &[

              ("g", 1 << 30),

              ("gb", 1 << 30),

              ("m", 1 << 20),

              ("mb", 1 << 20),

              ("k", 1 << 10),

              ("kb", 1 << 10),

              ("b", 1 << 0), // Needs to be last

          ];

          for &(unit, multiplier) in UNITS {

              if let Some(value) = value.strip_suffix(unit) {

                  let float: f64 = value.trim().parse().ok()?;

                  if float >= 0.0 {

                      return Some((float * multiplier as f64).round() as u64);

                  } else {

                      return None;

                  }

              }

          }

          value.parse().ok()

      }

      /// Parse a config value as a list of sub-values.

      ///

      /// Ported from `parselist` in `mercurial/utils/stringutil.py`

      // Note: keep behavior in sync with the Python one.

      // Note: this could return `Vec<Cow<[u8]>>` instead and borrow `input` when

      // possible (when there’s no backslash-escapes) but this is probably not worth

      // the complexity as config is presumably not accessed inside

      // preformance-sensitive loops.

      pub(super) fn parse_list(input: &[u8]) -> Vec<Vec<u8>> {

          // Port of Python’s `value.lstrip(b' ,\n')`

          // TODO: is this really what we want?

          let input =

              input.trim_start_matches(|b| b == b' ' || b == b',' || b == b'\n');

          parse_list_without_trim_start(input)

      }

      fn parse_list_without_trim_start(input: &[u8]) -> Vec<Vec<u8>> {

          // Start of port of Python’s `_configlist`

          let input = input.trim_end_matches(|b| b == b' ' || b == b',');

          if input.is_empty() {

              return Vec::new();

          }

          // Just to make “a string” less confusable with “a list of strings”.

          type ByteString = Vec<u8>;

          // These correspond to Python’s…

          let mut mode = ParserMode::Plain; // `parser`

          let mut values = Vec::new(); // `parts[:-1]`

          let mut next_value = ByteString::new(); // `parts[-1]`

          let mut offset = 0; // `offset`

          // Setting `parser` to `None` is instead handled by returning immediately

          enum ParserMode {

              Plain,

              Quoted,

          }

          loop {

              match mode {

                  ParserMode::Plain => {

                      // Start of port of Python’s `_parse_plain`

                      let mut whitespace = false;

                      while let Some(&byte) = input.get(offset) {

                          if is_space(byte) || byte == b',' {

                              whitespace = true;

                              offset += 1;

                          } else {

                              break;

                          }

                      }

                      if let Some(&byte) = input.get(offset) {

                          if whitespace {

                              values.push(std::mem::take(&mut next_value))

                          }

                          if byte == b'"' && next_value.is_empty() {

                              mode = ParserMode::Quoted;

                          } else {

                              if byte == b'"' && next_value.ends_with(b"\\") {

                                  next_value.pop();

                              }

                              next_value.push(byte);

                          }

                          offset += 1;

                      } else {

                          values.push(next_value);

                          return values;

                      }

                  }

                  ParserMode::Quoted => {

                      // Start of port of Python’s `_parse_quote`

                      if let Some(&byte) = input.get(offset) {

                          if byte == b'"' {

                              // The input contains a quoted zero-length value `""`

                              debug_assert_eq!(next_value, b"");

                              values.push(std::mem::take(&mut next_value));

                              offset += 1;

                              while let Some(&byte) = input.get(offset) {

                                  if is_space(byte) || byte == b',' {

                                      offset += 1;

                                  } else {

                                      break;

                                  }

                              }

                              mode = ParserMode::Plain;

                              continue;

                          }

                      }

                      while let Some(&byte) = input.get(offset) {

                          if byte == b'"' {

                              break;

                          }

                          if byte == b'\\' && input.get(offset + 1) == Some(&b'"') {

                              next_value.push(b'"');

                              offset += 2;

                          } else {

                              next_value.push(byte);

                              offset += 1;

                          }

                      }

                      if offset >= input.len() {

                          // We didn’t find a closing double-quote,

                          // so treat the opening one as part of an unquoted value

                          // instead of delimiting the start of a quoted value.

                          // `next_value` may have had some backslash-escapes

                          // unescaped. TODO: shouldn’t we use a slice of `input`

                          // instead?

                          let mut real_values =

                              parse_list_without_trim_start(&next_value);

                          if let Some(first) = real_values.first_mut() {

                              first.insert(0, b'"');

                              // Drop `next_value`

                              values.extend(real_values)

                          } else {

                              next_value.push(b'"');

                              values.push(next_value);

                          }

                          return values;

                      }

                      // We’re not at the end of the input, which means the `while`

                      // loop above ended at at double quote. Skip

                      // over that.

                      offset += 1;

                      while let Some(&byte) = input.get(offset) {

                          if byte == b' ' || byte == b',' {

                              offset += 1;

                          } else {

                              break;

                          }

                      }

                      if offset >= input.len() {

                          values.push(next_value);

                          return values;

                      }

                      if offset + 1 == input.len() && input[offset] == b'"' {

                          next_value.push(b'"');

                          offset += 1;

                      } else {

                          values.push(std::mem::take(&mut next_value));

                      }

                      mode = ParserMode::Plain;

                  }

              }

          }

          // https://docs.python.org/3/library/stdtypes.html?#bytes.isspace

          fn is_space(byte: u8) -> bool {

              matches!(byte, b' ' | b'\t' | b'\n' | b'\r' | b'\x0b' | b'\x0c')

          }

      }

      #[test]

      fn test_parse_list() {

          // Make `assert_eq` error messages nicer

          fn as_strings(values: &[Vec<u8>]) -> Vec<String> {

              values

                  .iter()

                  .map(|v| std::str::from_utf8(v.as_ref()).unwrap().to_owned())

                  .collect()

          }

          macro_rules! assert_parse_list {

              ( $input: expr => [ $( $output: expr ),* ] ) => {

                  assert_eq!(

                      as_strings(&parse_list($input)),

                      as_strings(&[ $( Vec::from(&$output[..]) ),* ]),

                  );

              }

          }

          // Keep these Rust tests in sync with the Python ones in

          // `tests/test-config-parselist.py`

          assert_parse_list!(b"" => []);

          assert_parse_list!(b"," => []);

          assert_parse_list!(b"A" => [b"A"]);

          assert_parse_list!(b"B,B" => [b"B", b"B"]);

          assert_parse_list!(b", C, ,C," => [b"C", b"C"]);

          assert_parse_list!(b"\"" => [b"\""]);

          assert_parse_list!(b"\"\"" => [b"", b""]);

          assert_parse_list!(b"D,\"" => [b"D", b"\""]);

          assert_parse_list!(b"E,\"\"" => [b"E", b"", b""]);

          assert_parse_list!(b"\"F,F\"" => [b"F,F"]);

          assert_parse_list!(b"\"G,G" => [b"\"G", b"G"]);

          assert_parse_list!(b"\"H \\\",\\\"H" => [b"\"H", b",", b"H"]);

          assert_parse_list!(b"I,I\"" => [b"I", b"I\""]);

          assert_parse_list!(b"J,\"J" => [b"J", b"\"J"]);

          assert_parse_list!(b"K K" => [b"K", b"K"]);

          assert_parse_list!(b"\"K\" K" => [b"K", b"K"]);

          assert_parse_list!(b"L\tL" => [b"L", b"L"]);

          assert_parse_list!(b"\"L\"\tL" => [b"L", b"", b"L"]);

          assert_parse_list!(b"M\x0bM" => [b"M", b"M"]);

          assert_parse_list!(b"\"M\"\x0bM" => [b"M", b"", b"M"]);

          assert_parse_list!(b"\"N\"  , ,\"" => [b"N\""]);

          assert_parse_list!(b"\" ,O,  " => [b"\"", b"O"]);

      }

      #[test]

      fn test_parse_byte_size() {

          assert_eq!(parse_byte_size(b""), None);

          assert_eq!(parse_byte_size(b"b"), None);

          assert_eq!(parse_byte_size(b"12"), Some(12));

          assert_eq!(parse_byte_size(b"12b"), Some(12));

          assert_eq!(parse_byte_size(b"12 b"), Some(12));

          assert_eq!(parse_byte_size(b"12.1 b"), Some(12));

          assert_eq!(parse_byte_size(b"1.1 K"), Some(1126));

          assert_eq!(parse_byte_size(b"1.1 kB"), Some(1126));

          assert_eq!(parse_byte_size(b"-12 b"), None);

          assert_eq!(parse_byte_size(b"-0.1 b"), None);

          assert_eq!(parse_byte_size(b"0.1 b"), Some(0));

          assert_eq!(parse_byte_size(b"12.1 b"), Some(12));

      }

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				//! Parsing functions for various type of configuration values.
				//!
				//! Returning `None` indicates a syntax error. Using a `Result` would be more
				//! correct but would take more boilerplate for converting between error types,
				//! compared to using `.ok()` on inner results of various error types to
				//! convert them all to options. The `Config::get_parse` method later converts
				//! those options to results with `ConfigValueParseError`, which contains
				//! details about where the value came from (but omits details of what’s
				//! invalid inside the value).

				use crate::utils::SliceExt;

				pub(super) fn parse_bool(v: &[u8]) -> Option<bool> {
				match v.to_ascii_lowercase().as_slice() {
				b"1" \| b"yes" \| b"true" \| b"on" \| b"always" => Some(true),
				b"0" \| b"no" \| b"false" \| b"off" \| b"never" => Some(false),
				_ => None,
				}
				}

				pub(super) fn parse_byte_size(value: &[u8]) -> Option<u64> {
				let value = std::str::from_utf8(value).ok()?.to_ascii_lowercase();
				const UNITS: &[(&str, u64)] = &[
				("g", 1 << 30),
				("gb", 1 << 30),
				("m", 1 << 20),
				("mb", 1 << 20),
				("k", 1 << 10),
				("kb", 1 << 10),
				("b", 1 << 0), // Needs to be last
				];
				for &(unit, multiplier) in UNITS {
				if let Some(value) = value.strip_suffix(unit) {
				let float: f64 = value.trim().parse().ok()?;
				if float >= 0.0 {
				return Some((float * multiplier as f64).round() as u64);
				} else {
				return None;
				}
				}
				}
				value.parse().ok()
				}

				/// Parse a config value as a list of sub-values.
				///
				/// Ported from `parselist` in `mercurial/utils/stringutil.py`

				// Note: keep behavior in sync with the Python one.

				// Note: this could return `Vec<Cow<[u8]>>` instead and borrow `input` when
				// possible (when there’s no backslash-escapes) but this is probably not worth
				// the complexity as config is presumably not accessed inside
				// preformance-sensitive loops.
				pub(super) fn parse_list(input: &[u8]) -> Vec<Vec<u8>> {
				// Port of Python’s `value.lstrip(b' ,\n')`
				// TODO: is this really what we want?
				let input =
				input.trim_start_matches(\|b\| b == b' ' \|\| b == b',' \|\| b == b'\n');
				parse_list_without_trim_start(input)
				}

				fn parse_list_without_trim_start(input: &[u8]) -> Vec<Vec<u8>> {
				// Start of port of Python’s `_configlist`
				let input = input.trim_end_matches(\|b\| b == b' ' \|\| b == b',');
				if input.is_empty() {
				return Vec::new();
				}

				// Just to make “a string” less confusable with “a list of strings”.
				type ByteString = Vec<u8>;

				// These correspond to Python’s…
				let mut mode = ParserMode::Plain; // `parser`
				let mut values = Vec::new(); // `parts[:-1]`
				let mut next_value = ByteString::new(); // `parts[-1]`
				let mut offset = 0; // `offset`

				// Setting `parser` to `None` is instead handled by returning immediately
				enum ParserMode {
				Plain,
				Quoted,
				}

				loop {
				match mode {
				ParserMode::Plain => {
				// Start of port of Python’s `_parse_plain`
				let mut whitespace = false;
				while let Some(&byte) = input.get(offset) {
				if is_space(byte) \|\| byte == b',' {
				whitespace = true;
				offset += 1;
				} else {
				break;
				}
				}
				if let Some(&byte) = input.get(offset) {
				if whitespace {
				values.push(std::mem::take(&mut next_value))
				}
				if byte == b'"' && next_value.is_empty() {
				mode = ParserMode::Quoted;
				} else {
				if byte == b'"' && next_value.ends_with(b"\\") {
				next_value.pop();
				}
				next_value.push(byte);
				}
				offset += 1;
				} else {
				values.push(next_value);
				return values;
				}
				}
				ParserMode::Quoted => {
				// Start of port of Python’s `_parse_quote`
				if let Some(&byte) = input.get(offset) {
				if byte == b'"' {
				// The input contains a quoted zero-length value `""`
				debug_assert_eq!(next_value, b"");
				values.push(std::mem::take(&mut next_value));
				offset += 1;
				while let Some(&byte) = input.get(offset) {
				if is_space(byte) \|\| byte == b',' {
				offset += 1;
				} else {
				break;
				}
				}
				mode = ParserMode::Plain;
				continue;
				}
				}

				while let Some(&byte) = input.get(offset) {
				if byte == b'"' {
				break;
				}
				if byte == b'\\' && input.get(offset + 1) == Some(&b'"') {
				next_value.push(b'"');
				offset += 2;
				} else {
				next_value.push(byte);
				offset += 1;
				}
				}

				if offset >= input.len() {
				// We didn’t find a closing double-quote,
				// so treat the opening one as part of an unquoted value
				// instead of delimiting the start of a quoted value.

				// `next_value` may have had some backslash-escapes
				// unescaped. TODO: shouldn’t we use a slice of `input`
				// instead?
				let mut real_values =
				parse_list_without_trim_start(&next_value);

				if let Some(first) = real_values.first_mut() {
				first.insert(0, b'"');
				// Drop `next_value`
				values.extend(real_values)
				} else {
				next_value.push(b'"');
				values.push(next_value);
				}
				return values;
				}

				// We’re not at the end of the input, which means the `while`
				// loop above ended at at double quote. Skip
				// over that.
				offset += 1;

				while let Some(&byte) = input.get(offset) {
				if byte == b' ' \|\| byte == b',' {
				offset += 1;
				} else {
				break;
				}
				}

				if offset >= input.len() {
				values.push(next_value);
				return values;
				}

				if offset + 1 == input.len() && input[offset] == b'"' {
				next_value.push(b'"');
				offset += 1;
				} else {
				values.push(std::mem::take(&mut next_value));
				}

				mode = ParserMode::Plain;
				}
				}
				}

				// https://docs.python.org/3/library/stdtypes.html?#bytes.isspace
				fn is_space(byte: u8) -> bool {
				matches!(byte, b' ' \| b'\t' \| b'\n' \| b'\r' \| b'\x0b' \| b'\x0c')
				}
				}

				#[test]
				fn test_parse_list() {
				// Make `assert_eq` error messages nicer
				fn as_strings(values: &[Vec<u8>]) -> Vec<String> {
				values
				.iter()
				.map(\|v\| std::str::from_utf8(v.as_ref()).unwrap().to_owned())
				.collect()
				}
				macro_rules! assert_parse_list {
				( $input: expr => [ $( $output: expr ),* ] ) => {
				assert_eq!(
				as_strings(&parse_list($input)),
				as_strings(&[ $( Vec::from(&$output[..]) ),* ]),
				);
				}
				}

				// Keep these Rust tests in sync with the Python ones in
				// `tests/test-config-parselist.py`
				assert_parse_list!(b"" => []);
				assert_parse_list!(b"," => []);
				assert_parse_list!(b"A" => [b"A"]);
				assert_parse_list!(b"B,B" => [b"B", b"B"]);
				assert_parse_list!(b", C, ,C," => [b"C", b"C"]);
				assert_parse_list!(b"\"" => [b"\""]);
				assert_parse_list!(b"\"\"" => [b"", b""]);
				assert_parse_list!(b"D,\"" => [b"D", b"\""]);
				assert_parse_list!(b"E,\"\"" => [b"E", b"", b""]);
				assert_parse_list!(b"\"F,F\"" => [b"F,F"]);
				assert_parse_list!(b"\"G,G" => [b"\"G", b"G"]);
				assert_parse_list!(b"\"H \\\",\\\"H" => [b"\"H", b",", b"H"]);
				assert_parse_list!(b"I,I\"" => [b"I", b"I\""]);
				assert_parse_list!(b"J,\"J" => [b"J", b"\"J"]);
				assert_parse_list!(b"K K" => [b"K", b"K"]);
				assert_parse_list!(b"\"K\" K" => [b"K", b"K"]);
				assert_parse_list!(b"L\tL" => [b"L", b"L"]);
				assert_parse_list!(b"\"L\"\tL" => [b"L", b"", b"L"]);
				assert_parse_list!(b"M\x0bM" => [b"M", b"M"]);
				assert_parse_list!(b"\"M\"\x0bM" => [b"M", b"", b"M"]);
				assert_parse_list!(b"\"N\" , ,\"" => [b"N\""]);
				assert_parse_list!(b"\" ,O, " => [b"\"", b"O"]);
				}

				#[test]
				fn test_parse_byte_size() {
				assert_eq!(parse_byte_size(b""), None);
				assert_eq!(parse_byte_size(b"b"), None);

				assert_eq!(parse_byte_size(b"12"), Some(12));
				assert_eq!(parse_byte_size(b"12b"), Some(12));
				assert_eq!(parse_byte_size(b"12 b"), Some(12));
				assert_eq!(parse_byte_size(b"12.1 b"), Some(12));
				assert_eq!(parse_byte_size(b"1.1 K"), Some(1126));
				assert_eq!(parse_byte_size(b"1.1 kB"), Some(1126));

				assert_eq!(parse_byte_size(b"-12 b"), None);
				assert_eq!(parse_byte_size(b"-0.1 b"), None);
				assert_eq!(parse_byte_size(b"0.1 b"), Some(0));
				assert_eq!(parse_byte_size(b"12.1 b"), Some(12));
				}