Show More
@@ -0,0 +1,52 b'' | |||
|
1 | """ | |
|
2 | List-valued configuration keys have an ad-hoc microsyntax. From `hg help config`: | |
|
3 | ||
|
4 | > List values are separated by whitespace or comma, except when values are | |
|
5 | > placed in double quotation marks: | |
|
6 | > | |
|
7 | > allow_read = "John Doe, PhD", brian, betty | |
|
8 | > | |
|
9 | > Quotation marks can be escaped by prefixing them with a backslash. Only | |
|
10 | > quotation marks at the beginning of a word is counted as a quotation | |
|
11 | > (e.g., ``foo"bar baz`` is the list of ``foo"bar`` and ``baz``). | |
|
12 | ||
|
13 | That help documentation is fairly light on details, the actual parser has many | |
|
14 | other edge cases. This test tries to cover them. | |
|
15 | """ | |
|
16 | ||
|
17 | from mercurial.utils import stringutil | |
|
18 | ||
|
19 | ||
|
20 | def assert_parselist(input, expected): | |
|
21 | result = stringutil.parselist(input) | |
|
22 | if result != expected: | |
|
23 | raise AssertionError( | |
|
24 | "parse_input(%r)\n got %r\nexpected %r" | |
|
25 | % (input, result, expected) | |
|
26 | ) | |
|
27 | ||
|
28 | ||
|
29 | # Keep these Python tests in sync with the Rust ones in `rust/hg-core/src/config/values.rs` | |
|
30 | ||
|
31 | assert_parselist(b'', []) | |
|
32 | assert_parselist(b',', []) | |
|
33 | assert_parselist(b'A', [b'A']) | |
|
34 | assert_parselist(b'B,B', [b'B', b'B']) | |
|
35 | assert_parselist(b', C, ,C,', [b'C', b'C']) | |
|
36 | assert_parselist(b'"', [b'"']) | |
|
37 | assert_parselist(b'""', [b'', b'']) | |
|
38 | assert_parselist(b'D,"', [b'D', b'"']) | |
|
39 | assert_parselist(b'E,""', [b'E', b'', b'']) | |
|
40 | assert_parselist(b'"F,F"', [b'F,F']) | |
|
41 | assert_parselist(b'"G,G', [b'"G', b'G']) | |
|
42 | assert_parselist(b'"H \\",\\"H', [b'"H', b',', b'H']) | |
|
43 | assert_parselist(b'I,I"', [b'I', b'I"']) | |
|
44 | assert_parselist(b'J,"J', [b'J', b'"J']) | |
|
45 | assert_parselist(b'K K', [b'K', b'K']) | |
|
46 | assert_parselist(b'"K" K', [b'K', b'K']) | |
|
47 | assert_parselist(b'L\tL', [b'L', b'L']) | |
|
48 | assert_parselist(b'"L"\tL', [b'L', b'', b'L']) | |
|
49 | assert_parselist(b'M\x0bM', [b'M', b'M']) | |
|
50 | assert_parselist(b'"M"\x0bM', [b'M', b'', b'M']) | |
|
51 | assert_parselist(b'"N" , ,"', [b'N"']) | |
|
52 | assert_parselist(b'" ,O, ', [b'"', b'O']) |
@@ -388,6 +388,16 b' impl Config {' | |||
|
388 | 388 | }) |
|
389 | 389 | } |
|
390 | 390 | |
|
391 | /// If there is an `item` value in `section`, parse and return a list of | |
|
392 | /// byte strings. | |
|
393 | pub fn get_list( | |
|
394 | &self, | |
|
395 | section: &[u8], | |
|
396 | item: &[u8], | |
|
397 | ) -> Option<Vec<Vec<u8>>> { | |
|
398 | self.get(section, item).map(values::parse_list) | |
|
399 | } | |
|
400 | ||
|
391 | 401 | /// Returns the raw value bytes of the first one found, or `None`. |
|
392 | 402 | pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&[u8]> { |
|
393 | 403 | self.get_inner(section, item) |
@@ -8,6 +8,8 b'' | |||
|
8 | 8 | //! details about where the value came from (but omits details of what’s |
|
9 | 9 | //! invalid inside the value). |
|
10 | 10 | |
|
11 | use crate::utils::SliceExt; | |
|
12 | ||
|
11 | 13 | pub(super) fn parse_bool(v: &[u8]) -> Option<bool> { |
|
12 | 14 | match v.to_ascii_lowercase().as_slice() { |
|
13 | 15 | b"1" | b"yes" | b"true" | b"on" | b"always" => Some(true), |
@@ -42,6 +44,216 b' pub(super) fn parse_byte_size(value: &[u' | |||
|
42 | 44 | value.parse().ok() |
|
43 | 45 | } |
|
44 | 46 | |
|
47 | /// Parse a config value as a list of sub-values. | |
|
48 | /// | |
|
49 | /// Ported from `parselist` in `mercurial/utils/stringutil.py` | |
|
50 | ||
|
51 | // Note: keep behavior in sync with the Python one. | |
|
52 | ||
|
53 | // Note: this could return `Vec<Cow<[u8]>>` instead and borrow `input` when | |
|
54 | // possible (when there’s no backslash-escapes) but this is probably not worth | |
|
55 | // the complexity as config is presumably not accessed inside | |
|
56 | // preformance-sensitive loops. | |
|
57 | pub(super) fn parse_list(input: &[u8]) -> Vec<Vec<u8>> { | |
|
58 | // Port of Python’s `value.lstrip(b' ,\n')` | |
|
59 | // TODO: is this really what we want? | |
|
60 | let input = | |
|
61 | input.trim_start_matches(|b| b == b' ' || b == b',' || b == b'\n'); | |
|
62 | parse_list_without_trim_start(input) | |
|
63 | } | |
|
64 | ||
|
65 | fn parse_list_without_trim_start(input: &[u8]) -> Vec<Vec<u8>> { | |
|
66 | // Start of port of Python’s `_configlist` | |
|
67 | let input = input.trim_end_matches(|b| b == b' ' || b == b','); | |
|
68 | if input.is_empty() { | |
|
69 | return Vec::new(); | |
|
70 | } | |
|
71 | ||
|
72 | // Just to make “a string” less confusable with “a list of strings”. | |
|
73 | type ByteString = Vec<u8>; | |
|
74 | ||
|
75 | // These correspond to Python’s… | |
|
76 | let mut mode = ParserMode::Plain; // `parser` | |
|
77 | let mut values = Vec::new(); // `parts[:-1]` | |
|
78 | let mut next_value = ByteString::new(); // `parts[-1]` | |
|
79 | let mut offset = 0; // `offset` | |
|
80 | ||
|
81 | // Setting `parser` to `None` is instead handled by returning immediately | |
|
82 | enum ParserMode { | |
|
83 | Plain, | |
|
84 | Quoted, | |
|
85 | } | |
|
86 | ||
|
87 | loop { | |
|
88 | match mode { | |
|
89 | ParserMode::Plain => { | |
|
90 | // Start of port of Python’s `_parse_plain` | |
|
91 | let mut whitespace = false; | |
|
92 | while let Some(&byte) = input.get(offset) { | |
|
93 | if is_space(byte) || byte == b',' { | |
|
94 | whitespace = true; | |
|
95 | offset += 1; | |
|
96 | } else { | |
|
97 | break; | |
|
98 | } | |
|
99 | } | |
|
100 | if let Some(&byte) = input.get(offset) { | |
|
101 | if whitespace { | |
|
102 | values.push(std::mem::take(&mut next_value)) | |
|
103 | } | |
|
104 | if byte == b'"' && next_value.is_empty() { | |
|
105 | mode = ParserMode::Quoted; | |
|
106 | } else { | |
|
107 | if byte == b'"' && next_value.ends_with(b"\\") { | |
|
108 | next_value.pop(); | |
|
109 | } | |
|
110 | next_value.push(byte); | |
|
111 | } | |
|
112 | offset += 1; | |
|
113 | } else { | |
|
114 | values.push(next_value); | |
|
115 | return values; | |
|
116 | } | |
|
117 | } | |
|
118 | ParserMode::Quoted => { | |
|
119 | // Start of port of Python’s `_parse_quote` | |
|
120 | if let Some(&byte) = input.get(offset) { | |
|
121 | if byte == b'"' { | |
|
122 | // The input contains a quoted zero-length value `""` | |
|
123 | debug_assert_eq!(next_value, b""); | |
|
124 | values.push(std::mem::take(&mut next_value)); | |
|
125 | offset += 1; | |
|
126 | while let Some(&byte) = input.get(offset) { | |
|
127 | if is_space(byte) || byte == b',' { | |
|
128 | offset += 1; | |
|
129 | } else { | |
|
130 | break; | |
|
131 | } | |
|
132 | } | |
|
133 | mode = ParserMode::Plain; | |
|
134 | continue; | |
|
135 | } | |
|
136 | } | |
|
137 | ||
|
138 | while let Some(&byte) = input.get(offset) { | |
|
139 | if byte == b'"' { | |
|
140 | break; | |
|
141 | } | |
|
142 | if byte == b'\\' && input.get(offset + 1) == Some(&b'"') { | |
|
143 | next_value.push(b'"'); | |
|
144 | offset += 2; | |
|
145 | } else { | |
|
146 | next_value.push(byte); | |
|
147 | offset += 1; | |
|
148 | } | |
|
149 | } | |
|
150 | ||
|
151 | if offset >= input.len() { | |
|
152 | // We didn’t find a closing double-quote, | |
|
153 | // so treat the opening one as part of an unquoted value | |
|
154 | // instead of delimiting the start of a quoted value. | |
|
155 | ||
|
156 | // `next_value` may have had some backslash-escapes | |
|
157 | // unescaped. TODO: shouldn’t we use a slice of `input` | |
|
158 | // instead? | |
|
159 | let mut real_values = | |
|
160 | parse_list_without_trim_start(&next_value); | |
|
161 | ||
|
162 | if let Some(first) = real_values.first_mut() { | |
|
163 | first.insert(0, b'"'); | |
|
164 | // Drop `next_value` | |
|
165 | values.extend(real_values) | |
|
166 | } else { | |
|
167 | next_value.push(b'"'); | |
|
168 | values.push(next_value); | |
|
169 | } | |
|
170 | return values; | |
|
171 | } | |
|
172 | ||
|
173 | // We’re not at the end of the input, which means the `while` | |
|
174 | // loop above ended at at double quote. Skip | |
|
175 | // over that. | |
|
176 | offset += 1; | |
|
177 | ||
|
178 | while let Some(&byte) = input.get(offset) { | |
|
179 | if byte == b' ' || byte == b',' { | |
|
180 | offset += 1; | |
|
181 | } else { | |
|
182 | break; | |
|
183 | } | |
|
184 | } | |
|
185 | ||
|
186 | if offset >= input.len() { | |
|
187 | values.push(next_value); | |
|
188 | return values; | |
|
189 | } | |
|
190 | ||
|
191 | if offset + 1 == input.len() && input[offset] == b'"' { | |
|
192 | next_value.push(b'"'); | |
|
193 | offset += 1; | |
|
194 | } else { | |
|
195 | values.push(std::mem::take(&mut next_value)); | |
|
196 | } | |
|
197 | ||
|
198 | mode = ParserMode::Plain; | |
|
199 | } | |
|
200 | } | |
|
201 | } | |
|
202 | ||
|
203 | // https://docs.python.org/3/library/stdtypes.html?#bytes.isspace | |
|
204 | fn is_space(byte: u8) -> bool { | |
|
205 | if let b' ' | b'\t' | b'\n' | b'\r' | b'\x0b' | b'\x0c' = byte { | |
|
206 | true | |
|
207 | } else { | |
|
208 | false | |
|
209 | } | |
|
210 | } | |
|
211 | } | |
|
212 | ||
|
213 | #[test] | |
|
214 | fn test_parse_list() { | |
|
215 | // Make `assert_eq` error messages nicer | |
|
216 | fn as_strings(values: &[Vec<u8>]) -> Vec<String> { | |
|
217 | values | |
|
218 | .iter() | |
|
219 | .map(|v| std::str::from_utf8(v.as_ref()).unwrap().to_owned()) | |
|
220 | .collect() | |
|
221 | } | |
|
222 | macro_rules! assert_parse_list { | |
|
223 | ( $input: expr => [ $( $output: expr ),* ] ) => { | |
|
224 | assert_eq!( | |
|
225 | as_strings(&parse_list($input)), | |
|
226 | as_strings(&[ $( Vec::from(&$output[..]) ),* ]), | |
|
227 | ); | |
|
228 | } | |
|
229 | } | |
|
230 | ||
|
231 | // Keep these Rust tests in sync with the Python ones in | |
|
232 | // `tests/test-config-parselist.py` | |
|
233 | assert_parse_list!(b"" => []); | |
|
234 | assert_parse_list!(b"," => []); | |
|
235 | assert_parse_list!(b"A" => [b"A"]); | |
|
236 | assert_parse_list!(b"B,B" => [b"B", b"B"]); | |
|
237 | assert_parse_list!(b", C, ,C," => [b"C", b"C"]); | |
|
238 | assert_parse_list!(b"\"" => [b"\""]); | |
|
239 | assert_parse_list!(b"\"\"" => [b"", b""]); | |
|
240 | assert_parse_list!(b"D,\"" => [b"D", b"\""]); | |
|
241 | assert_parse_list!(b"E,\"\"" => [b"E", b"", b""]); | |
|
242 | assert_parse_list!(b"\"F,F\"" => [b"F,F"]); | |
|
243 | assert_parse_list!(b"\"G,G" => [b"\"G", b"G"]); | |
|
244 | assert_parse_list!(b"\"H \\\",\\\"H" => [b"\"H", b",", b"H"]); | |
|
245 | assert_parse_list!(b"I,I\"" => [b"I", b"I\""]); | |
|
246 | assert_parse_list!(b"J,\"J" => [b"J", b"\"J"]); | |
|
247 | assert_parse_list!(b"K K" => [b"K", b"K"]); | |
|
248 | assert_parse_list!(b"\"K\" K" => [b"K", b"K"]); | |
|
249 | assert_parse_list!(b"L\tL" => [b"L", b"L"]); | |
|
250 | assert_parse_list!(b"\"L\"\tL" => [b"L", b"", b"L"]); | |
|
251 | assert_parse_list!(b"M\x0bM" => [b"M", b"M"]); | |
|
252 | assert_parse_list!(b"\"M\"\x0bM" => [b"M", b"", b"M"]); | |
|
253 | assert_parse_list!(b"\"N\" , ,\"" => [b"N\""]); | |
|
254 | assert_parse_list!(b"\" ,O, " => [b"\"", b"O"]); | |
|
255 | } | |
|
256 | ||
|
45 | 257 | #[test] |
|
46 | 258 | fn test_parse_byte_size() { |
|
47 | 259 | assert_eq!(parse_byte_size(b""), None); |
General Comments 0
You need to be logged in to leave comments.
Login now