Show More
@@ -0,0 +1,52 b'' | |||||
|
1 | """ | |||
|
2 | List-valued configuration keys have an ad-hoc microsyntax. From `hg help config`: | |||
|
3 | ||||
|
4 | > List values are separated by whitespace or comma, except when values are | |||
|
5 | > placed in double quotation marks: | |||
|
6 | > | |||
|
7 | > allow_read = "John Doe, PhD", brian, betty | |||
|
8 | > | |||
|
9 | > Quotation marks can be escaped by prefixing them with a backslash. Only | |||
|
10 | > quotation marks at the beginning of a word is counted as a quotation | |||
|
11 | > (e.g., ``foo"bar baz`` is the list of ``foo"bar`` and ``baz``). | |||
|
12 | ||||
|
13 | That help documentation is fairly light on details, the actual parser has many | |||
|
14 | other edge cases. This test tries to cover them. | |||
|
15 | """ | |||
|
16 | ||||
|
17 | from mercurial.utils import stringutil | |||
|
18 | ||||
|
19 | ||||
|
20 | def assert_parselist(input, expected): | |||
|
21 | result = stringutil.parselist(input) | |||
|
22 | if result != expected: | |||
|
23 | raise AssertionError( | |||
|
24 | "parse_input(%r)\n got %r\nexpected %r" | |||
|
25 | % (input, result, expected) | |||
|
26 | ) | |||
|
27 | ||||
|
28 | ||||
|
29 | # Keep these Python tests in sync with the Rust ones in `rust/hg-core/src/config/values.rs` | |||
|
30 | ||||
|
31 | assert_parselist(b'', []) | |||
|
32 | assert_parselist(b',', []) | |||
|
33 | assert_parselist(b'A', [b'A']) | |||
|
34 | assert_parselist(b'B,B', [b'B', b'B']) | |||
|
35 | assert_parselist(b', C, ,C,', [b'C', b'C']) | |||
|
36 | assert_parselist(b'"', [b'"']) | |||
|
37 | assert_parselist(b'""', [b'', b'']) | |||
|
38 | assert_parselist(b'D,"', [b'D', b'"']) | |||
|
39 | assert_parselist(b'E,""', [b'E', b'', b'']) | |||
|
40 | assert_parselist(b'"F,F"', [b'F,F']) | |||
|
41 | assert_parselist(b'"G,G', [b'"G', b'G']) | |||
|
42 | assert_parselist(b'"H \\",\\"H', [b'"H', b',', b'H']) | |||
|
43 | assert_parselist(b'I,I"', [b'I', b'I"']) | |||
|
44 | assert_parselist(b'J,"J', [b'J', b'"J']) | |||
|
45 | assert_parselist(b'K K', [b'K', b'K']) | |||
|
46 | assert_parselist(b'"K" K', [b'K', b'K']) | |||
|
47 | assert_parselist(b'L\tL', [b'L', b'L']) | |||
|
48 | assert_parselist(b'"L"\tL', [b'L', b'', b'L']) | |||
|
49 | assert_parselist(b'M\x0bM', [b'M', b'M']) | |||
|
50 | assert_parselist(b'"M"\x0bM', [b'M', b'', b'M']) | |||
|
51 | assert_parselist(b'"N" , ,"', [b'N"']) | |||
|
52 | assert_parselist(b'" ,O, ', [b'"', b'O']) |
@@ -388,6 +388,16 b' impl Config {' | |||||
388 | }) |
|
388 | }) | |
389 | } |
|
389 | } | |
390 |
|
390 | |||
|
391 | /// If there is an `item` value in `section`, parse and return a list of | |||
|
392 | /// byte strings. | |||
|
393 | pub fn get_list( | |||
|
394 | &self, | |||
|
395 | section: &[u8], | |||
|
396 | item: &[u8], | |||
|
397 | ) -> Option<Vec<Vec<u8>>> { | |||
|
398 | self.get(section, item).map(values::parse_list) | |||
|
399 | } | |||
|
400 | ||||
391 | /// Returns the raw value bytes of the first one found, or `None`. |
|
401 | /// Returns the raw value bytes of the first one found, or `None`. | |
392 | pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&[u8]> { |
|
402 | pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&[u8]> { | |
393 | self.get_inner(section, item) |
|
403 | self.get_inner(section, item) |
@@ -8,6 +8,8 b'' | |||||
8 | //! details about where the value came from (but omits details of what’s |
|
8 | //! details about where the value came from (but omits details of what’s | |
9 | //! invalid inside the value). |
|
9 | //! invalid inside the value). | |
10 |
|
10 | |||
|
11 | use crate::utils::SliceExt; | |||
|
12 | ||||
11 | pub(super) fn parse_bool(v: &[u8]) -> Option<bool> { |
|
13 | pub(super) fn parse_bool(v: &[u8]) -> Option<bool> { | |
12 | match v.to_ascii_lowercase().as_slice() { |
|
14 | match v.to_ascii_lowercase().as_slice() { | |
13 | b"1" | b"yes" | b"true" | b"on" | b"always" => Some(true), |
|
15 | b"1" | b"yes" | b"true" | b"on" | b"always" => Some(true), | |
@@ -42,6 +44,216 b' pub(super) fn parse_byte_size(value: &[u' | |||||
42 | value.parse().ok() |
|
44 | value.parse().ok() | |
43 | } |
|
45 | } | |
44 |
|
46 | |||
|
47 | /// Parse a config value as a list of sub-values. | |||
|
48 | /// | |||
|
49 | /// Ported from `parselist` in `mercurial/utils/stringutil.py` | |||
|
50 | ||||
|
51 | // Note: keep behavior in sync with the Python one. | |||
|
52 | ||||
|
53 | // Note: this could return `Vec<Cow<[u8]>>` instead and borrow `input` when | |||
|
54 | // possible (when there’s no backslash-escapes) but this is probably not worth | |||
|
55 | // the complexity as config is presumably not accessed inside | |||
|
56 | // preformance-sensitive loops. | |||
|
57 | pub(super) fn parse_list(input: &[u8]) -> Vec<Vec<u8>> { | |||
|
58 | // Port of Python’s `value.lstrip(b' ,\n')` | |||
|
59 | // TODO: is this really what we want? | |||
|
60 | let input = | |||
|
61 | input.trim_start_matches(|b| b == b' ' || b == b',' || b == b'\n'); | |||
|
62 | parse_list_without_trim_start(input) | |||
|
63 | } | |||
|
64 | ||||
|
65 | fn parse_list_without_trim_start(input: &[u8]) -> Vec<Vec<u8>> { | |||
|
66 | // Start of port of Python’s `_configlist` | |||
|
67 | let input = input.trim_end_matches(|b| b == b' ' || b == b','); | |||
|
68 | if input.is_empty() { | |||
|
69 | return Vec::new(); | |||
|
70 | } | |||
|
71 | ||||
|
72 | // Just to make “a string” less confusable with “a list of strings”. | |||
|
73 | type ByteString = Vec<u8>; | |||
|
74 | ||||
|
75 | // These correspond to Python’s… | |||
|
76 | let mut mode = ParserMode::Plain; // `parser` | |||
|
77 | let mut values = Vec::new(); // `parts[:-1]` | |||
|
78 | let mut next_value = ByteString::new(); // `parts[-1]` | |||
|
79 | let mut offset = 0; // `offset` | |||
|
80 | ||||
|
81 | // Setting `parser` to `None` is instead handled by returning immediately | |||
|
82 | enum ParserMode { | |||
|
83 | Plain, | |||
|
84 | Quoted, | |||
|
85 | } | |||
|
86 | ||||
|
87 | loop { | |||
|
88 | match mode { | |||
|
89 | ParserMode::Plain => { | |||
|
90 | // Start of port of Python’s `_parse_plain` | |||
|
91 | let mut whitespace = false; | |||
|
92 | while let Some(&byte) = input.get(offset) { | |||
|
93 | if is_space(byte) || byte == b',' { | |||
|
94 | whitespace = true; | |||
|
95 | offset += 1; | |||
|
96 | } else { | |||
|
97 | break; | |||
|
98 | } | |||
|
99 | } | |||
|
100 | if let Some(&byte) = input.get(offset) { | |||
|
101 | if whitespace { | |||
|
102 | values.push(std::mem::take(&mut next_value)) | |||
|
103 | } | |||
|
104 | if byte == b'"' && next_value.is_empty() { | |||
|
105 | mode = ParserMode::Quoted; | |||
|
106 | } else { | |||
|
107 | if byte == b'"' && next_value.ends_with(b"\\") { | |||
|
108 | next_value.pop(); | |||
|
109 | } | |||
|
110 | next_value.push(byte); | |||
|
111 | } | |||
|
112 | offset += 1; | |||
|
113 | } else { | |||
|
114 | values.push(next_value); | |||
|
115 | return values; | |||
|
116 | } | |||
|
117 | } | |||
|
118 | ParserMode::Quoted => { | |||
|
119 | // Start of port of Python’s `_parse_quote` | |||
|
120 | if let Some(&byte) = input.get(offset) { | |||
|
121 | if byte == b'"' { | |||
|
122 | // The input contains a quoted zero-length value `""` | |||
|
123 | debug_assert_eq!(next_value, b""); | |||
|
124 | values.push(std::mem::take(&mut next_value)); | |||
|
125 | offset += 1; | |||
|
126 | while let Some(&byte) = input.get(offset) { | |||
|
127 | if is_space(byte) || byte == b',' { | |||
|
128 | offset += 1; | |||
|
129 | } else { | |||
|
130 | break; | |||
|
131 | } | |||
|
132 | } | |||
|
133 | mode = ParserMode::Plain; | |||
|
134 | continue; | |||
|
135 | } | |||
|
136 | } | |||
|
137 | ||||
|
138 | while let Some(&byte) = input.get(offset) { | |||
|
139 | if byte == b'"' { | |||
|
140 | break; | |||
|
141 | } | |||
|
142 | if byte == b'\\' && input.get(offset + 1) == Some(&b'"') { | |||
|
143 | next_value.push(b'"'); | |||
|
144 | offset += 2; | |||
|
145 | } else { | |||
|
146 | next_value.push(byte); | |||
|
147 | offset += 1; | |||
|
148 | } | |||
|
149 | } | |||
|
150 | ||||
|
151 | if offset >= input.len() { | |||
|
152 | // We didn’t find a closing double-quote, | |||
|
153 | // so treat the opening one as part of an unquoted value | |||
|
154 | // instead of delimiting the start of a quoted value. | |||
|
155 | ||||
|
156 | // `next_value` may have had some backslash-escapes | |||
|
157 | // unescaped. TODO: shouldn’t we use a slice of `input` | |||
|
158 | // instead? | |||
|
159 | let mut real_values = | |||
|
160 | parse_list_without_trim_start(&next_value); | |||
|
161 | ||||
|
162 | if let Some(first) = real_values.first_mut() { | |||
|
163 | first.insert(0, b'"'); | |||
|
164 | // Drop `next_value` | |||
|
165 | values.extend(real_values) | |||
|
166 | } else { | |||
|
167 | next_value.push(b'"'); | |||
|
168 | values.push(next_value); | |||
|
169 | } | |||
|
170 | return values; | |||
|
171 | } | |||
|
172 | ||||
|
173 | // We’re not at the end of the input, which means the `while` | |||
|
174 | // loop above ended at at double quote. Skip | |||
|
175 | // over that. | |||
|
176 | offset += 1; | |||
|
177 | ||||
|
178 | while let Some(&byte) = input.get(offset) { | |||
|
179 | if byte == b' ' || byte == b',' { | |||
|
180 | offset += 1; | |||
|
181 | } else { | |||
|
182 | break; | |||
|
183 | } | |||
|
184 | } | |||
|
185 | ||||
|
186 | if offset >= input.len() { | |||
|
187 | values.push(next_value); | |||
|
188 | return values; | |||
|
189 | } | |||
|
190 | ||||
|
191 | if offset + 1 == input.len() && input[offset] == b'"' { | |||
|
192 | next_value.push(b'"'); | |||
|
193 | offset += 1; | |||
|
194 | } else { | |||
|
195 | values.push(std::mem::take(&mut next_value)); | |||
|
196 | } | |||
|
197 | ||||
|
198 | mode = ParserMode::Plain; | |||
|
199 | } | |||
|
200 | } | |||
|
201 | } | |||
|
202 | ||||
|
203 | // https://docs.python.org/3/library/stdtypes.html?#bytes.isspace | |||
|
204 | fn is_space(byte: u8) -> bool { | |||
|
205 | if let b' ' | b'\t' | b'\n' | b'\r' | b'\x0b' | b'\x0c' = byte { | |||
|
206 | true | |||
|
207 | } else { | |||
|
208 | false | |||
|
209 | } | |||
|
210 | } | |||
|
211 | } | |||
|
212 | ||||
|
213 | #[test] | |||
|
214 | fn test_parse_list() { | |||
|
215 | // Make `assert_eq` error messages nicer | |||
|
216 | fn as_strings(values: &[Vec<u8>]) -> Vec<String> { | |||
|
217 | values | |||
|
218 | .iter() | |||
|
219 | .map(|v| std::str::from_utf8(v.as_ref()).unwrap().to_owned()) | |||
|
220 | .collect() | |||
|
221 | } | |||
|
222 | macro_rules! assert_parse_list { | |||
|
223 | ( $input: expr => [ $( $output: expr ),* ] ) => { | |||
|
224 | assert_eq!( | |||
|
225 | as_strings(&parse_list($input)), | |||
|
226 | as_strings(&[ $( Vec::from(&$output[..]) ),* ]), | |||
|
227 | ); | |||
|
228 | } | |||
|
229 | } | |||
|
230 | ||||
|
231 | // Keep these Rust tests in sync with the Python ones in | |||
|
232 | // `tests/test-config-parselist.py` | |||
|
233 | assert_parse_list!(b"" => []); | |||
|
234 | assert_parse_list!(b"," => []); | |||
|
235 | assert_parse_list!(b"A" => [b"A"]); | |||
|
236 | assert_parse_list!(b"B,B" => [b"B", b"B"]); | |||
|
237 | assert_parse_list!(b", C, ,C," => [b"C", b"C"]); | |||
|
238 | assert_parse_list!(b"\"" => [b"\""]); | |||
|
239 | assert_parse_list!(b"\"\"" => [b"", b""]); | |||
|
240 | assert_parse_list!(b"D,\"" => [b"D", b"\""]); | |||
|
241 | assert_parse_list!(b"E,\"\"" => [b"E", b"", b""]); | |||
|
242 | assert_parse_list!(b"\"F,F\"" => [b"F,F"]); | |||
|
243 | assert_parse_list!(b"\"G,G" => [b"\"G", b"G"]); | |||
|
244 | assert_parse_list!(b"\"H \\\",\\\"H" => [b"\"H", b",", b"H"]); | |||
|
245 | assert_parse_list!(b"I,I\"" => [b"I", b"I\""]); | |||
|
246 | assert_parse_list!(b"J,\"J" => [b"J", b"\"J"]); | |||
|
247 | assert_parse_list!(b"K K" => [b"K", b"K"]); | |||
|
248 | assert_parse_list!(b"\"K\" K" => [b"K", b"K"]); | |||
|
249 | assert_parse_list!(b"L\tL" => [b"L", b"L"]); | |||
|
250 | assert_parse_list!(b"\"L\"\tL" => [b"L", b"", b"L"]); | |||
|
251 | assert_parse_list!(b"M\x0bM" => [b"M", b"M"]); | |||
|
252 | assert_parse_list!(b"\"M\"\x0bM" => [b"M", b"", b"M"]); | |||
|
253 | assert_parse_list!(b"\"N\" , ,\"" => [b"N\""]); | |||
|
254 | assert_parse_list!(b"\" ,O, " => [b"\"", b"O"]); | |||
|
255 | } | |||
|
256 | ||||
45 | #[test] |
|
257 | #[test] | |
46 | fn test_parse_byte_size() { |
|
258 | fn test_parse_byte_size() { | |
47 | assert_eq!(parse_byte_size(b""), None); |
|
259 | assert_eq!(parse_byte_size(b""), None); |
General Comments 0
You need to be logged in to leave comments.
Login now