upstream/mercurial-mirror Commit - r48762:6961eca0

rhg: Port Python’s `ui.configlist` as `Config::get_list`...

Simon Sapin -

r48762:6961eca0 default

parent child

tests/test-config-parselist.py

0 created 644 +52 0

			@@ -0,0 +1,52 b''
		1	"""
		2	List-valued configuration keys have an ad-hoc microsyntax. From `hg help config`:
		3
		4	> List values are separated by whitespace or comma, except when values are
		5	> placed in double quotation marks:
		6	>
		7	> allow_read = "John Doe, PhD", brian, betty
		8	>
		9	> Quotation marks can be escaped by prefixing them with a backslash. Only
		10	> quotation marks at the beginning of a word is counted as a quotation
		11	> (e.g., ``foo"bar baz`` is the list of ``foo"bar`` and ``baz``).
		12
		13	That help documentation is fairly light on details, the actual parser has many
		14	other edge cases. This test tries to cover them.
		15	"""
		16
		17	from mercurial.utils import stringutil
		18
		19
		20	def assert_parselist(input, expected):
		21	result = stringutil.parselist(input)
		22	if result != expected:
		23	raise AssertionError(
		24	"parse_input(%r)\n got %r\nexpected %r"
		25	% (input, result, expected)
		26	)
		27
		28
		29	# Keep these Python tests in sync with the Rust ones in `rust/hg-core/src/config/values.rs`
		30
		31	assert_parselist(b'', [])
		32	assert_parselist(b',', [])
		33	assert_parselist(b'A', [b'A'])
		34	assert_parselist(b'B,B', [b'B', b'B'])
		35	assert_parselist(b', C, ,C,', [b'C', b'C'])
		36	assert_parselist(b'"', [b'"'])
		37	assert_parselist(b'""', [b'', b''])
		38	assert_parselist(b'D,"', [b'D', b'"'])
		39	assert_parselist(b'E,""', [b'E', b'', b''])
		40	assert_parselist(b'"F,F"', [b'F,F'])
		41	assert_parselist(b'"G,G', [b'"G', b'G'])
		42	assert_parselist(b'"H \\",\\"H', [b'"H', b',', b'H'])
		43	assert_parselist(b'I,I"', [b'I', b'I"'])
		44	assert_parselist(b'J,"J', [b'J', b'"J'])
		45	assert_parselist(b'K K', [b'K', b'K'])
		46	assert_parselist(b'"K" K', [b'K', b'K'])
		47	assert_parselist(b'L\tL', [b'L', b'L'])
		48	assert_parselist(b'"L"\tL', [b'L', b'', b'L'])
		49	assert_parselist(b'M\x0bM', [b'M', b'M'])
		50	assert_parselist(b'"M"\x0bM', [b'M', b'', b'M'])
		51	assert_parselist(b'"N" , ,"', [b'N"'])
		52	assert_parselist(b'" ,O, ', [b'"', b'O'])

rust/hg-core/src/config/config.rs

0 +10 0

                      })
                  }
+                 /// If there is an `item` value in `section`, parse and return a list of
+                 /// byte strings.
+                 pub fn get_list(
+                     &self,
+                     section: &[u8],
+                     item: &[u8],
+                 ) -> Option<Vec<Vec<u8>>> {
+                     self.get(section, item).map(values::parse_list)
+                 }
                  /// Returns the raw value bytes of the first one found, or `None`.
                  pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&[u8]> {
                      self.get_inner(section, item)

rust/hg-core/src/config/values.rs

0 +212 0

		@@ -8,6 +8,8 b''
8	8	//! details about where the value came from (but omits details of what’s
9	9	//! invalid inside the value).
10	10
	11	use crate::utils::SliceExt;
	12
11	13	pub(super) fn parse_bool(v: &[u8]) -> Option<bool> {
12	14	match v.to_ascii_lowercase().as_slice() {
13	15	b"1" \| b"yes" \| b"true" \| b"on" \| b"always" => Some(true),
		@@ -42,6 +44,216 b' pub(super) fn parse_byte_size(value: &[u'
42	44	value.parse().ok()
43	45	}
44	46
	47	/// Parse a config value as a list of sub-values.
	48	///
	49	/// Ported from `parselist` in `mercurial/utils/stringutil.py`
	50
	51	// Note: keep behavior in sync with the Python one.
	52
	53	// Note: this could return `Vec<Cow<[u8]>>` instead and borrow `input` when
	54	// possible (when there’s no backslash-escapes) but this is probably not worth
	55	// the complexity as config is presumably not accessed inside
	56	// preformance-sensitive loops.
	57	pub(super) fn parse_list(input: &[u8]) -> Vec<Vec<u8>> {
	58	// Port of Python’s `value.lstrip(b' ,\n')`
	59	// TODO: is this really what we want?
	60	let input =
	61	input.trim_start_matches(\|b\| b == b' ' \|\| b == b',' \|\| b == b'\n');
	62	parse_list_without_trim_start(input)
	63	}
	64
	65	fn parse_list_without_trim_start(input: &[u8]) -> Vec<Vec<u8>> {
	66	// Start of port of Python’s `_configlist`
	67	let input = input.trim_end_matches(\|b\| b == b' ' \|\| b == b',');
	68	if input.is_empty() {
	69	return Vec::new();
	70	}
	71
	72	// Just to make “a string” less confusable with “a list of strings”.
	73	type ByteString = Vec<u8>;
	74
	75	// These correspond to Python’s…
	76	let mut mode = ParserMode::Plain; // `parser`
	77	let mut values = Vec::new(); // `parts[:-1]`
	78	let mut next_value = ByteString::new(); // `parts[-1]`
	79	let mut offset = 0; // `offset`
	80
	81	// Setting `parser` to `None` is instead handled by returning immediately
	82	enum ParserMode {
	83	Plain,
	84	Quoted,
	85	}
	86
	87	loop {
	88	match mode {
	89	ParserMode::Plain => {
	90	// Start of port of Python’s `_parse_plain`
	91	let mut whitespace = false;
	92	while let Some(&byte) = input.get(offset) {
	93	if is_space(byte) \|\| byte == b',' {
	94	whitespace = true;
	95	offset += 1;
	96	} else {
	97	break;
	98	}
	99	}
	100	if let Some(&byte) = input.get(offset) {
	101	if whitespace {
	102	values.push(std::mem::take(&mut next_value))
	103	}
	104	if byte == b'"' && next_value.is_empty() {
	105	mode = ParserMode::Quoted;
	106	} else {
	107	if byte == b'"' && next_value.ends_with(b"\\") {
	108	next_value.pop();
	109	}
	110	next_value.push(byte);
	111	}
	112	offset += 1;
	113	} else {
	114	values.push(next_value);
	115	return values;
	116	}
	117	}
	118	ParserMode::Quoted => {
	119	// Start of port of Python’s `_parse_quote`
	120	if let Some(&byte) = input.get(offset) {
	121	if byte == b'"' {
	122	// The input contains a quoted zero-length value `""`
	123	debug_assert_eq!(next_value, b"");
	124	values.push(std::mem::take(&mut next_value));
	125	offset += 1;
	126	while let Some(&byte) = input.get(offset) {
	127	if is_space(byte) \|\| byte == b',' {
	128	offset += 1;
	129	} else {
	130	break;
	131	}
	132	}
	133	mode = ParserMode::Plain;
	134	continue;
	135	}
	136	}
	137
	138	while let Some(&byte) = input.get(offset) {
	139	if byte == b'"' {
	140	break;
	141	}
	142	if byte == b'\\' && input.get(offset + 1) == Some(&b'"') {
	143	next_value.push(b'"');
	144	offset += 2;
	145	} else {
	146	next_value.push(byte);
	147	offset += 1;
	148	}
	149	}
	150
	151	if offset >= input.len() {
	152	// We didn’t find a closing double-quote,
	153	// so treat the opening one as part of an unquoted value
	154	// instead of delimiting the start of a quoted value.
	155
	156	// `next_value` may have had some backslash-escapes
	157	// unescaped. TODO: shouldn’t we use a slice of `input`
	158	// instead?
	159	let mut real_values =
	160	parse_list_without_trim_start(&next_value);
	161
	162	if let Some(first) = real_values.first_mut() {
	163	first.insert(0, b'"');
	164	// Drop `next_value`
	165	values.extend(real_values)
	166	} else {
	167	next_value.push(b'"');
	168	values.push(next_value);
	169	}
	170	return values;
	171	}
	172
	173	// We’re not at the end of the input, which means the `while`
	174	// loop above ended at at double quote. Skip
	175	// over that.
	176	offset += 1;
	177
	178	while let Some(&byte) = input.get(offset) {
	179	if byte == b' ' \|\| byte == b',' {
	180	offset += 1;
	181	} else {
	182	break;
	183	}
	184	}
	185
	186	if offset >= input.len() {
	187	values.push(next_value);
	188	return values;
	189	}
	190
	191	if offset + 1 == input.len() && input[offset] == b'"' {
	192	next_value.push(b'"');
	193	offset += 1;
	194	} else {
	195	values.push(std::mem::take(&mut next_value));
	196	}
	197
	198	mode = ParserMode::Plain;
	199	}
	200	}
	201	}
	202
	203	// https://docs.python.org/3/library/stdtypes.html?#bytes.isspace
	204	fn is_space(byte: u8) -> bool {
	205	if let b' ' \| b'\t' \| b'\n' \| b'\r' \| b'\x0b' \| b'\x0c' = byte {
	206	true
	207	} else {
	208	false
	209	}
	210	}
	211	}
	212
	213	#[test]
	214	fn test_parse_list() {
	215	// Make `assert_eq` error messages nicer
	216	fn as_strings(values: &[Vec<u8>]) -> Vec<String> {
	217	values
	218	.iter()
	219	.map(\|v\| std::str::from_utf8(v.as_ref()).unwrap().to_owned())
	220	.collect()
	221	}
	222	macro_rules! assert_parse_list {
	223	( $input: expr => [ $( $output: expr ),* ] ) => {
	224	assert_eq!(
	225	as_strings(&parse_list($input)),
	226	as_strings(&[ $( Vec::from(&$output[..]) ),* ]),
	227	);
	228	}
	229	}
	230
	231	// Keep these Rust tests in sync with the Python ones in
	232	// `tests/test-config-parselist.py`
	233	assert_parse_list!(b"" => []);
	234	assert_parse_list!(b"," => []);
	235	assert_parse_list!(b"A" => [b"A"]);
	236	assert_parse_list!(b"B,B" => [b"B", b"B"]);
	237	assert_parse_list!(b", C, ,C," => [b"C", b"C"]);
	238	assert_parse_list!(b"\"" => [b"\""]);
	239	assert_parse_list!(b"\"\"" => [b"", b""]);
	240	assert_parse_list!(b"D,\"" => [b"D", b"\""]);
	241	assert_parse_list!(b"E,\"\"" => [b"E", b"", b""]);
	242	assert_parse_list!(b"\"F,F\"" => [b"F,F"]);
	243	assert_parse_list!(b"\"G,G" => [b"\"G", b"G"]);
	244	assert_parse_list!(b"\"H \\\",\\\"H" => [b"\"H", b",", b"H"]);
	245	assert_parse_list!(b"I,I\"" => [b"I", b"I\""]);
	246	assert_parse_list!(b"J,\"J" => [b"J", b"\"J"]);
	247	assert_parse_list!(b"K K" => [b"K", b"K"]);
	248	assert_parse_list!(b"\"K\" K" => [b"K", b"K"]);
	249	assert_parse_list!(b"L\tL" => [b"L", b"L"]);
	250	assert_parse_list!(b"\"L\"\tL" => [b"L", b"", b"L"]);
	251	assert_parse_list!(b"M\x0bM" => [b"M", b"M"]);
	252	assert_parse_list!(b"\"M\"\x0bM" => [b"M", b"", b"M"]);
	253	assert_parse_list!(b"\"N\" , ,\"" => [b"N\""]);
	254	assert_parse_list!(b"\" ,O, " => [b"\"", b"O"]);
	255	}
	256
45	257	#[test]
46	258	fn test_parse_byte_size() {
47	259	assert_eq!(parse_byte_size(b""), None);

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages