##// END OF EJS Templates
rhg: Port Python’s `ui.configlist` as `Config::get_list`...
Simon Sapin -
r48762:6961eca0 default
parent child Browse files
Show More
@@ -0,0 +1,52 b''
1 """
2 List-valued configuration keys have an ad-hoc microsyntax. From `hg help config`:
3
4 > List values are separated by whitespace or comma, except when values are
5 > placed in double quotation marks:
6 >
7 > allow_read = "John Doe, PhD", brian, betty
8 >
9 > Quotation marks can be escaped by prefixing them with a backslash. Only
10 > quotation marks at the beginning of a word is counted as a quotation
11 > (e.g., ``foo"bar baz`` is the list of ``foo"bar`` and ``baz``).
12
13 That help documentation is fairly light on details, the actual parser has many
14 other edge cases. This test tries to cover them.
15 """
16
17 from mercurial.utils import stringutil
18
19
20 def assert_parselist(input, expected):
21 result = stringutil.parselist(input)
22 if result != expected:
23 raise AssertionError(
24 "parse_input(%r)\n got %r\nexpected %r"
25 % (input, result, expected)
26 )
27
28
29 # Keep these Python tests in sync with the Rust ones in `rust/hg-core/src/config/values.rs`
30
31 assert_parselist(b'', [])
32 assert_parselist(b',', [])
33 assert_parselist(b'A', [b'A'])
34 assert_parselist(b'B,B', [b'B', b'B'])
35 assert_parselist(b', C, ,C,', [b'C', b'C'])
36 assert_parselist(b'"', [b'"'])
37 assert_parselist(b'""', [b'', b''])
38 assert_parselist(b'D,"', [b'D', b'"'])
39 assert_parselist(b'E,""', [b'E', b'', b''])
40 assert_parselist(b'"F,F"', [b'F,F'])
41 assert_parselist(b'"G,G', [b'"G', b'G'])
42 assert_parselist(b'"H \\",\\"H', [b'"H', b',', b'H'])
43 assert_parselist(b'I,I"', [b'I', b'I"'])
44 assert_parselist(b'J,"J', [b'J', b'"J'])
45 assert_parselist(b'K K', [b'K', b'K'])
46 assert_parselist(b'"K" K', [b'K', b'K'])
47 assert_parselist(b'L\tL', [b'L', b'L'])
48 assert_parselist(b'"L"\tL', [b'L', b'', b'L'])
49 assert_parselist(b'M\x0bM', [b'M', b'M'])
50 assert_parselist(b'"M"\x0bM', [b'M', b'', b'M'])
51 assert_parselist(b'"N" , ,"', [b'N"'])
52 assert_parselist(b'" ,O, ', [b'"', b'O'])
@@ -388,6 +388,16 b' impl Config {'
388 388 })
389 389 }
390 390
391 /// If there is an `item` value in `section`, parse and return a list of
392 /// byte strings.
393 pub fn get_list(
394 &self,
395 section: &[u8],
396 item: &[u8],
397 ) -> Option<Vec<Vec<u8>>> {
398 self.get(section, item).map(values::parse_list)
399 }
400
391 401 /// Returns the raw value bytes of the first one found, or `None`.
392 402 pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&[u8]> {
393 403 self.get_inner(section, item)
@@ -8,6 +8,8 b''
8 8 //! details about where the value came from (but omits details of what’s
9 9 //! invalid inside the value).
10 10
11 use crate::utils::SliceExt;
12
11 13 pub(super) fn parse_bool(v: &[u8]) -> Option<bool> {
12 14 match v.to_ascii_lowercase().as_slice() {
13 15 b"1" | b"yes" | b"true" | b"on" | b"always" => Some(true),
@@ -42,6 +44,216 b' pub(super) fn parse_byte_size(value: &[u'
42 44 value.parse().ok()
43 45 }
44 46
47 /// Parse a config value as a list of sub-values.
48 ///
49 /// Ported from `parselist` in `mercurial/utils/stringutil.py`
50
51 // Note: keep behavior in sync with the Python one.
52
53 // Note: this could return `Vec<Cow<[u8]>>` instead and borrow `input` when
54 // possible (when there’s no backslash-escapes) but this is probably not worth
55 // the complexity as config is presumably not accessed inside
56 // preformance-sensitive loops.
57 pub(super) fn parse_list(input: &[u8]) -> Vec<Vec<u8>> {
58 // Port of Python’s `value.lstrip(b' ,\n')`
59 // TODO: is this really what we want?
60 let input =
61 input.trim_start_matches(|b| b == b' ' || b == b',' || b == b'\n');
62 parse_list_without_trim_start(input)
63 }
64
65 fn parse_list_without_trim_start(input: &[u8]) -> Vec<Vec<u8>> {
66 // Start of port of Python’s `_configlist`
67 let input = input.trim_end_matches(|b| b == b' ' || b == b',');
68 if input.is_empty() {
69 return Vec::new();
70 }
71
72 // Just to make “a string” less confusable with “a list of strings”.
73 type ByteString = Vec<u8>;
74
75 // These correspond to Python’s…
76 let mut mode = ParserMode::Plain; // `parser`
77 let mut values = Vec::new(); // `parts[:-1]`
78 let mut next_value = ByteString::new(); // `parts[-1]`
79 let mut offset = 0; // `offset`
80
81 // Setting `parser` to `None` is instead handled by returning immediately
82 enum ParserMode {
83 Plain,
84 Quoted,
85 }
86
87 loop {
88 match mode {
89 ParserMode::Plain => {
90 // Start of port of Python’s `_parse_plain`
91 let mut whitespace = false;
92 while let Some(&byte) = input.get(offset) {
93 if is_space(byte) || byte == b',' {
94 whitespace = true;
95 offset += 1;
96 } else {
97 break;
98 }
99 }
100 if let Some(&byte) = input.get(offset) {
101 if whitespace {
102 values.push(std::mem::take(&mut next_value))
103 }
104 if byte == b'"' && next_value.is_empty() {
105 mode = ParserMode::Quoted;
106 } else {
107 if byte == b'"' && next_value.ends_with(b"\\") {
108 next_value.pop();
109 }
110 next_value.push(byte);
111 }
112 offset += 1;
113 } else {
114 values.push(next_value);
115 return values;
116 }
117 }
118 ParserMode::Quoted => {
119 // Start of port of Python’s `_parse_quote`
120 if let Some(&byte) = input.get(offset) {
121 if byte == b'"' {
122 // The input contains a quoted zero-length value `""`
123 debug_assert_eq!(next_value, b"");
124 values.push(std::mem::take(&mut next_value));
125 offset += 1;
126 while let Some(&byte) = input.get(offset) {
127 if is_space(byte) || byte == b',' {
128 offset += 1;
129 } else {
130 break;
131 }
132 }
133 mode = ParserMode::Plain;
134 continue;
135 }
136 }
137
138 while let Some(&byte) = input.get(offset) {
139 if byte == b'"' {
140 break;
141 }
142 if byte == b'\\' && input.get(offset + 1) == Some(&b'"') {
143 next_value.push(b'"');
144 offset += 2;
145 } else {
146 next_value.push(byte);
147 offset += 1;
148 }
149 }
150
151 if offset >= input.len() {
152 // We didn’t find a closing double-quote,
153 // so treat the opening one as part of an unquoted value
154 // instead of delimiting the start of a quoted value.
155
156 // `next_value` may have had some backslash-escapes
157 // unescaped. TODO: shouldn’t we use a slice of `input`
158 // instead?
159 let mut real_values =
160 parse_list_without_trim_start(&next_value);
161
162 if let Some(first) = real_values.first_mut() {
163 first.insert(0, b'"');
164 // Drop `next_value`
165 values.extend(real_values)
166 } else {
167 next_value.push(b'"');
168 values.push(next_value);
169 }
170 return values;
171 }
172
173 // We’re not at the end of the input, which means the `while`
174 // loop above ended at at double quote. Skip
175 // over that.
176 offset += 1;
177
178 while let Some(&byte) = input.get(offset) {
179 if byte == b' ' || byte == b',' {
180 offset += 1;
181 } else {
182 break;
183 }
184 }
185
186 if offset >= input.len() {
187 values.push(next_value);
188 return values;
189 }
190
191 if offset + 1 == input.len() && input[offset] == b'"' {
192 next_value.push(b'"');
193 offset += 1;
194 } else {
195 values.push(std::mem::take(&mut next_value));
196 }
197
198 mode = ParserMode::Plain;
199 }
200 }
201 }
202
203 // https://docs.python.org/3/library/stdtypes.html?#bytes.isspace
204 fn is_space(byte: u8) -> bool {
205 if let b' ' | b'\t' | b'\n' | b'\r' | b'\x0b' | b'\x0c' = byte {
206 true
207 } else {
208 false
209 }
210 }
211 }
212
213 #[test]
214 fn test_parse_list() {
215 // Make `assert_eq` error messages nicer
216 fn as_strings(values: &[Vec<u8>]) -> Vec<String> {
217 values
218 .iter()
219 .map(|v| std::str::from_utf8(v.as_ref()).unwrap().to_owned())
220 .collect()
221 }
222 macro_rules! assert_parse_list {
223 ( $input: expr => [ $( $output: expr ),* ] ) => {
224 assert_eq!(
225 as_strings(&parse_list($input)),
226 as_strings(&[ $( Vec::from(&$output[..]) ),* ]),
227 );
228 }
229 }
230
231 // Keep these Rust tests in sync with the Python ones in
232 // `tests/test-config-parselist.py`
233 assert_parse_list!(b"" => []);
234 assert_parse_list!(b"," => []);
235 assert_parse_list!(b"A" => [b"A"]);
236 assert_parse_list!(b"B,B" => [b"B", b"B"]);
237 assert_parse_list!(b", C, ,C," => [b"C", b"C"]);
238 assert_parse_list!(b"\"" => [b"\""]);
239 assert_parse_list!(b"\"\"" => [b"", b""]);
240 assert_parse_list!(b"D,\"" => [b"D", b"\""]);
241 assert_parse_list!(b"E,\"\"" => [b"E", b"", b""]);
242 assert_parse_list!(b"\"F,F\"" => [b"F,F"]);
243 assert_parse_list!(b"\"G,G" => [b"\"G", b"G"]);
244 assert_parse_list!(b"\"H \\\",\\\"H" => [b"\"H", b",", b"H"]);
245 assert_parse_list!(b"I,I\"" => [b"I", b"I\""]);
246 assert_parse_list!(b"J,\"J" => [b"J", b"\"J"]);
247 assert_parse_list!(b"K K" => [b"K", b"K"]);
248 assert_parse_list!(b"\"K\" K" => [b"K", b"K"]);
249 assert_parse_list!(b"L\tL" => [b"L", b"L"]);
250 assert_parse_list!(b"\"L\"\tL" => [b"L", b"", b"L"]);
251 assert_parse_list!(b"M\x0bM" => [b"M", b"M"]);
252 assert_parse_list!(b"\"M\"\x0bM" => [b"M", b"", b"M"]);
253 assert_parse_list!(b"\"N\" , ,\"" => [b"N\""]);
254 assert_parse_list!(b"\" ,O, " => [b"\"", b"O"]);
255 }
256
45 257 #[test]
46 258 fn test_parse_byte_size() {
47 259 assert_eq!(parse_byte_size(b""), None);
General Comments 0
You need to be logged in to leave comments. Login now