##// END OF EJS Templates
rhg: Port Python’s `ui.configlist` as `Config::get_list`...
Simon Sapin -
r48762:6961eca0 default
parent child Browse files
Show More
@@ -0,0 +1,52 b''
1 """
2 List-valued configuration keys have an ad-hoc microsyntax. From `hg help config`:
3
4 > List values are separated by whitespace or comma, except when values are
5 > placed in double quotation marks:
6 >
7 > allow_read = "John Doe, PhD", brian, betty
8 >
9 > Quotation marks can be escaped by prefixing them with a backslash. Only
10 > quotation marks at the beginning of a word is counted as a quotation
11 > (e.g., ``foo"bar baz`` is the list of ``foo"bar`` and ``baz``).
12
13 That help documentation is fairly light on details, the actual parser has many
14 other edge cases. This test tries to cover them.
15 """
16
17 from mercurial.utils import stringutil
18
19
20 def assert_parselist(input, expected):
21 result = stringutil.parselist(input)
22 if result != expected:
23 raise AssertionError(
24 "parse_input(%r)\n got %r\nexpected %r"
25 % (input, result, expected)
26 )
27
28
29 # Keep these Python tests in sync with the Rust ones in `rust/hg-core/src/config/values.rs`
30
31 assert_parselist(b'', [])
32 assert_parselist(b',', [])
33 assert_parselist(b'A', [b'A'])
34 assert_parselist(b'B,B', [b'B', b'B'])
35 assert_parselist(b', C, ,C,', [b'C', b'C'])
36 assert_parselist(b'"', [b'"'])
37 assert_parselist(b'""', [b'', b''])
38 assert_parselist(b'D,"', [b'D', b'"'])
39 assert_parselist(b'E,""', [b'E', b'', b''])
40 assert_parselist(b'"F,F"', [b'F,F'])
41 assert_parselist(b'"G,G', [b'"G', b'G'])
42 assert_parselist(b'"H \\",\\"H', [b'"H', b',', b'H'])
43 assert_parselist(b'I,I"', [b'I', b'I"'])
44 assert_parselist(b'J,"J', [b'J', b'"J'])
45 assert_parselist(b'K K', [b'K', b'K'])
46 assert_parselist(b'"K" K', [b'K', b'K'])
47 assert_parselist(b'L\tL', [b'L', b'L'])
48 assert_parselist(b'"L"\tL', [b'L', b'', b'L'])
49 assert_parselist(b'M\x0bM', [b'M', b'M'])
50 assert_parselist(b'"M"\x0bM', [b'M', b'', b'M'])
51 assert_parselist(b'"N" , ,"', [b'N"'])
52 assert_parselist(b'" ,O, ', [b'"', b'O'])
@@ -1,486 +1,496 b''
1 1 // config.rs
2 2 //
3 3 // Copyright 2020
4 4 // Valentin Gatien-Baron,
5 5 // Raphaël Gomès <rgomes@octobus.net>
6 6 //
7 7 // This software may be used and distributed according to the terms of the
8 8 // GNU General Public License version 2 or any later version.
9 9
10 10 use super::layer;
11 11 use super::values;
12 12 use crate::config::layer::{
13 13 ConfigError, ConfigLayer, ConfigOrigin, ConfigValue,
14 14 };
15 15 use crate::utils::files::get_bytes_from_os_str;
16 16 use crate::utils::SliceExt;
17 17 use format_bytes::{write_bytes, DisplayBytes};
18 18 use std::collections::HashSet;
19 19 use std::env;
20 20 use std::fmt;
21 21 use std::path::{Path, PathBuf};
22 22 use std::str;
23 23
24 24 use crate::errors::{HgResultExt, IoResultExt};
25 25
26 26 /// Holds the config values for the current repository
27 27 /// TODO update this docstring once we support more sources
28 28 #[derive(Clone)]
29 29 pub struct Config {
30 30 layers: Vec<layer::ConfigLayer>,
31 31 }
32 32
33 33 impl DisplayBytes for Config {
34 34 fn display_bytes(
35 35 &self,
36 36 out: &mut dyn std::io::Write,
37 37 ) -> std::io::Result<()> {
38 38 for (index, layer) in self.layers.iter().rev().enumerate() {
39 39 write_bytes!(
40 40 out,
41 41 b"==== Layer {} (trusted: {}) ====\n{}",
42 42 index,
43 43 if layer.trusted {
44 44 &b"yes"[..]
45 45 } else {
46 46 &b"no"[..]
47 47 },
48 48 layer
49 49 )?;
50 50 }
51 51 Ok(())
52 52 }
53 53 }
54 54
55 55 pub enum ConfigSource {
56 56 /// Absolute path to a config file
57 57 AbsPath(PathBuf),
58 58 /// Already parsed (from the CLI, env, Python resources, etc.)
59 59 Parsed(layer::ConfigLayer),
60 60 }
61 61
62 62 #[derive(Debug)]
63 63 pub struct ConfigValueParseError {
64 64 pub origin: ConfigOrigin,
65 65 pub line: Option<usize>,
66 66 pub section: Vec<u8>,
67 67 pub item: Vec<u8>,
68 68 pub value: Vec<u8>,
69 69 pub expected_type: &'static str,
70 70 }
71 71
72 72 impl fmt::Display for ConfigValueParseError {
73 73 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
74 74 // TODO: add origin and line number information, here and in
75 75 // corresponding python code
76 76 write!(
77 77 f,
78 78 "config error: {}.{} is not a {} ('{}')",
79 79 String::from_utf8_lossy(&self.section),
80 80 String::from_utf8_lossy(&self.item),
81 81 self.expected_type,
82 82 String::from_utf8_lossy(&self.value)
83 83 )
84 84 }
85 85 }
86 86
87 87 impl Config {
88 88 /// Load system and user configuration from various files.
89 89 ///
90 90 /// This is also affected by some environment variables.
91 91 pub fn load_non_repo() -> Result<Self, ConfigError> {
92 92 let mut config = Self { layers: Vec::new() };
93 93 let opt_rc_path = env::var_os("HGRCPATH");
94 94 // HGRCPATH replaces system config
95 95 if opt_rc_path.is_none() {
96 96 config.add_system_config()?
97 97 }
98 98
99 99 config.add_for_environment_variable("EDITOR", b"ui", b"editor");
100 100 config.add_for_environment_variable("VISUAL", b"ui", b"editor");
101 101 config.add_for_environment_variable("PAGER", b"pager", b"pager");
102 102
103 103 // These are set by `run-tests.py --rhg` to enable fallback for the
104 104 // entire test suite. Alternatives would be setting configuration
105 105 // through `$HGRCPATH` but some tests override that, or changing the
106 106 // `hg` shell alias to include `--config` but that disrupts tests that
107 107 // print command lines and check expected output.
108 108 config.add_for_environment_variable(
109 109 "RHG_ON_UNSUPPORTED",
110 110 b"rhg",
111 111 b"on-unsupported",
112 112 );
113 113 config.add_for_environment_variable(
114 114 "RHG_FALLBACK_EXECUTABLE",
115 115 b"rhg",
116 116 b"fallback-executable",
117 117 );
118 118
119 119 // HGRCPATH replaces user config
120 120 if opt_rc_path.is_none() {
121 121 config.add_user_config()?
122 122 }
123 123 if let Some(rc_path) = &opt_rc_path {
124 124 for path in env::split_paths(rc_path) {
125 125 if !path.as_os_str().is_empty() {
126 126 if path.is_dir() {
127 127 config.add_trusted_dir(&path)?
128 128 } else {
129 129 config.add_trusted_file(&path)?
130 130 }
131 131 }
132 132 }
133 133 }
134 134 Ok(config)
135 135 }
136 136
137 137 pub fn load_cli_args_config(
138 138 &mut self,
139 139 cli_config_args: impl IntoIterator<Item = impl AsRef<[u8]>>,
140 140 ) -> Result<(), ConfigError> {
141 141 if let Some(layer) = ConfigLayer::parse_cli_args(cli_config_args)? {
142 142 self.layers.push(layer)
143 143 }
144 144 Ok(())
145 145 }
146 146
147 147 fn add_trusted_dir(&mut self, path: &Path) -> Result<(), ConfigError> {
148 148 if let Some(entries) = std::fs::read_dir(path)
149 149 .when_reading_file(path)
150 150 .io_not_found_as_none()?
151 151 {
152 152 let mut file_paths = entries
153 153 .map(|result| {
154 154 result.when_reading_file(path).map(|entry| entry.path())
155 155 })
156 156 .collect::<Result<Vec<_>, _>>()?;
157 157 file_paths.sort();
158 158 for file_path in &file_paths {
159 159 if file_path.extension() == Some(std::ffi::OsStr::new("rc")) {
160 160 self.add_trusted_file(&file_path)?
161 161 }
162 162 }
163 163 }
164 164 Ok(())
165 165 }
166 166
167 167 fn add_trusted_file(&mut self, path: &Path) -> Result<(), ConfigError> {
168 168 if let Some(data) = std::fs::read(path)
169 169 .when_reading_file(path)
170 170 .io_not_found_as_none()?
171 171 {
172 172 self.layers.extend(ConfigLayer::parse(path, &data)?)
173 173 }
174 174 Ok(())
175 175 }
176 176
177 177 fn add_for_environment_variable(
178 178 &mut self,
179 179 var: &str,
180 180 section: &[u8],
181 181 key: &[u8],
182 182 ) {
183 183 if let Some(value) = env::var_os(var) {
184 184 let origin = layer::ConfigOrigin::Environment(var.into());
185 185 let mut layer = ConfigLayer::new(origin);
186 186 layer.add(
187 187 section.to_owned(),
188 188 key.to_owned(),
189 189 get_bytes_from_os_str(value),
190 190 None,
191 191 );
192 192 self.layers.push(layer)
193 193 }
194 194 }
195 195
196 196 #[cfg(unix)] // TODO: other platforms
197 197 fn add_system_config(&mut self) -> Result<(), ConfigError> {
198 198 let mut add_for_prefix = |prefix: &Path| -> Result<(), ConfigError> {
199 199 let etc = prefix.join("etc").join("mercurial");
200 200 self.add_trusted_file(&etc.join("hgrc"))?;
201 201 self.add_trusted_dir(&etc.join("hgrc.d"))
202 202 };
203 203 let root = Path::new("/");
204 204 // TODO: use `std::env::args_os().next().unwrap()` a.k.a. argv[0]
205 205 // instead? TODO: can this be a relative path?
206 206 let hg = crate::utils::current_exe()?;
207 207 // TODO: this order (per-installation then per-system) matches
208 208 // `systemrcpath()` in `mercurial/scmposix.py`, but
209 209 // `mercurial/helptext/config.txt` suggests it should be reversed
210 210 if let Some(installation_prefix) = hg.parent().and_then(Path::parent) {
211 211 if installation_prefix != root {
212 212 add_for_prefix(&installation_prefix)?
213 213 }
214 214 }
215 215 add_for_prefix(root)?;
216 216 Ok(())
217 217 }
218 218
219 219 #[cfg(unix)] // TODO: other plateforms
220 220 fn add_user_config(&mut self) -> Result<(), ConfigError> {
221 221 let opt_home = home::home_dir();
222 222 if let Some(home) = &opt_home {
223 223 self.add_trusted_file(&home.join(".hgrc"))?
224 224 }
225 225 let darwin = cfg!(any(target_os = "macos", target_os = "ios"));
226 226 if !darwin {
227 227 if let Some(config_home) = env::var_os("XDG_CONFIG_HOME")
228 228 .map(PathBuf::from)
229 229 .or_else(|| opt_home.map(|home| home.join(".config")))
230 230 {
231 231 self.add_trusted_file(&config_home.join("hg").join("hgrc"))?
232 232 }
233 233 }
234 234 Ok(())
235 235 }
236 236
237 237 /// Loads in order, which means that the precedence is the same
238 238 /// as the order of `sources`.
239 239 pub fn load_from_explicit_sources(
240 240 sources: Vec<ConfigSource>,
241 241 ) -> Result<Self, ConfigError> {
242 242 let mut layers = vec![];
243 243
244 244 for source in sources.into_iter() {
245 245 match source {
246 246 ConfigSource::Parsed(c) => layers.push(c),
247 247 ConfigSource::AbsPath(c) => {
248 248 // TODO check if it should be trusted
249 249 // mercurial/ui.py:427
250 250 let data = match std::fs::read(&c) {
251 251 Err(_) => continue, // same as the python code
252 252 Ok(data) => data,
253 253 };
254 254 layers.extend(ConfigLayer::parse(&c, &data)?)
255 255 }
256 256 }
257 257 }
258 258
259 259 Ok(Config { layers })
260 260 }
261 261
262 262 /// Loads the per-repository config into a new `Config` which is combined
263 263 /// with `self`.
264 264 pub(crate) fn combine_with_repo(
265 265 &self,
266 266 repo_config_files: &[PathBuf],
267 267 ) -> Result<Self, ConfigError> {
268 268 let (cli_layers, other_layers) = self
269 269 .layers
270 270 .iter()
271 271 .cloned()
272 272 .partition(ConfigLayer::is_from_command_line);
273 273
274 274 let mut repo_config = Self {
275 275 layers: other_layers,
276 276 };
277 277 for path in repo_config_files {
278 278 // TODO: check if this file should be trusted:
279 279 // `mercurial/ui.py:427`
280 280 repo_config.add_trusted_file(path)?;
281 281 }
282 282 repo_config.layers.extend(cli_layers);
283 283 Ok(repo_config)
284 284 }
285 285
286 286 fn get_parse<'config, T: 'config>(
287 287 &'config self,
288 288 section: &[u8],
289 289 item: &[u8],
290 290 expected_type: &'static str,
291 291 parse: impl Fn(&'config [u8]) -> Option<T>,
292 292 ) -> Result<Option<T>, ConfigValueParseError> {
293 293 match self.get_inner(&section, &item) {
294 294 Some((layer, v)) => match parse(&v.bytes) {
295 295 Some(b) => Ok(Some(b)),
296 296 None => Err(ConfigValueParseError {
297 297 origin: layer.origin.to_owned(),
298 298 line: v.line,
299 299 value: v.bytes.to_owned(),
300 300 section: section.to_owned(),
301 301 item: item.to_owned(),
302 302 expected_type,
303 303 }),
304 304 },
305 305 None => Ok(None),
306 306 }
307 307 }
308 308
309 309 /// Returns an `Err` if the first value found is not a valid UTF-8 string.
310 310 /// Otherwise, returns an `Ok(value)` if found, or `None`.
311 311 pub fn get_str(
312 312 &self,
313 313 section: &[u8],
314 314 item: &[u8],
315 315 ) -> Result<Option<&str>, ConfigValueParseError> {
316 316 self.get_parse(section, item, "ASCII or UTF-8 string", |value| {
317 317 str::from_utf8(value).ok()
318 318 })
319 319 }
320 320
321 321 /// Returns an `Err` if the first value found is not a valid unsigned
322 322 /// integer. Otherwise, returns an `Ok(value)` if found, or `None`.
323 323 pub fn get_u32(
324 324 &self,
325 325 section: &[u8],
326 326 item: &[u8],
327 327 ) -> Result<Option<u32>, ConfigValueParseError> {
328 328 self.get_parse(section, item, "valid integer", |value| {
329 329 str::from_utf8(value).ok()?.parse().ok()
330 330 })
331 331 }
332 332
333 333 /// Returns an `Err` if the first value found is not a valid file size
334 334 /// value such as `30` (default unit is bytes), `7 MB`, or `42.5 kb`.
335 335 /// Otherwise, returns an `Ok(value_in_bytes)` if found, or `None`.
336 336 pub fn get_byte_size(
337 337 &self,
338 338 section: &[u8],
339 339 item: &[u8],
340 340 ) -> Result<Option<u64>, ConfigValueParseError> {
341 341 self.get_parse(section, item, "byte quantity", values::parse_byte_size)
342 342 }
343 343
344 344 /// Returns an `Err` if the first value found is not a valid boolean.
345 345 /// Otherwise, returns an `Ok(option)`, where `option` is the boolean if
346 346 /// found, or `None`.
347 347 pub fn get_option(
348 348 &self,
349 349 section: &[u8],
350 350 item: &[u8],
351 351 ) -> Result<Option<bool>, ConfigValueParseError> {
352 352 self.get_parse(section, item, "boolean", values::parse_bool)
353 353 }
354 354
355 355 /// Returns the corresponding boolean in the config. Returns `Ok(false)`
356 356 /// if the value is not found, an `Err` if it's not a valid boolean.
357 357 pub fn get_bool(
358 358 &self,
359 359 section: &[u8],
360 360 item: &[u8],
361 361 ) -> Result<bool, ConfigValueParseError> {
362 362 Ok(self.get_option(section, item)?.unwrap_or(false))
363 363 }
364 364
365 365 /// Returns the corresponding list-value in the config if found, or `None`.
366 366 ///
367 367 /// This is appropriate for new configuration keys. The value syntax is
368 368 /// **not** the same as most existing list-valued config, which has Python
369 369 /// parsing implemented in `parselist()` in
370 370 /// `mercurial/utils/stringutil.py`. Faithfully porting that parsing
371 371 /// algorithm to Rust (including behavior that are arguably bugs)
372 372 /// turned out to be non-trivial and hasn’t been completed as of this
373 373 /// writing.
374 374 ///
375 375 /// Instead, the "simple" syntax is: split on comma, then trim leading and
376 376 /// trailing whitespace of each component. Quotes or backslashes are not
377 377 /// interpreted in any way. Commas are mandatory between values. Values
378 378 /// that contain a comma are not supported.
379 379 pub fn get_simple_list(
380 380 &self,
381 381 section: &[u8],
382 382 item: &[u8],
383 383 ) -> Option<impl Iterator<Item = &[u8]>> {
384 384 self.get(section, item).map(|value| {
385 385 value
386 386 .split(|&byte| byte == b',')
387 387 .map(|component| component.trim())
388 388 })
389 389 }
390 390
391 /// If there is an `item` value in `section`, parse and return a list of
392 /// byte strings.
393 pub fn get_list(
394 &self,
395 section: &[u8],
396 item: &[u8],
397 ) -> Option<Vec<Vec<u8>>> {
398 self.get(section, item).map(values::parse_list)
399 }
400
391 401 /// Returns the raw value bytes of the first one found, or `None`.
392 402 pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&[u8]> {
393 403 self.get_inner(section, item)
394 404 .map(|(_, value)| value.bytes.as_ref())
395 405 }
396 406
397 407 /// Returns the layer and the value of the first one found, or `None`.
398 408 fn get_inner(
399 409 &self,
400 410 section: &[u8],
401 411 item: &[u8],
402 412 ) -> Option<(&ConfigLayer, &ConfigValue)> {
403 413 for layer in self.layers.iter().rev() {
404 414 if !layer.trusted {
405 415 continue;
406 416 }
407 417 if let Some(v) = layer.get(&section, &item) {
408 418 return Some((&layer, v));
409 419 }
410 420 }
411 421 None
412 422 }
413 423
414 424 /// Return all keys defined for the given section
415 425 pub fn get_section_keys(&self, section: &[u8]) -> HashSet<&[u8]> {
416 426 self.layers
417 427 .iter()
418 428 .flat_map(|layer| layer.iter_keys(section))
419 429 .collect()
420 430 }
421 431
422 432 /// Get raw values bytes from all layers (even untrusted ones) in order
423 433 /// of precedence.
424 434 #[cfg(test)]
425 435 fn get_all(&self, section: &[u8], item: &[u8]) -> Vec<&[u8]> {
426 436 let mut res = vec![];
427 437 for layer in self.layers.iter().rev() {
428 438 if let Some(v) = layer.get(&section, &item) {
429 439 res.push(v.bytes.as_ref());
430 440 }
431 441 }
432 442 res
433 443 }
434 444 }
435 445
436 446 #[cfg(test)]
437 447 mod tests {
438 448 use super::*;
439 449 use pretty_assertions::assert_eq;
440 450 use std::fs::File;
441 451 use std::io::Write;
442 452
443 453 #[test]
444 454 fn test_include_layer_ordering() {
445 455 let tmpdir = tempfile::tempdir().unwrap();
446 456 let tmpdir_path = tmpdir.path();
447 457 let mut included_file =
448 458 File::create(&tmpdir_path.join("included.rc")).unwrap();
449 459
450 460 included_file.write_all(b"[section]\nitem=value1").unwrap();
451 461 let base_config_path = tmpdir_path.join("base.rc");
452 462 let mut config_file = File::create(&base_config_path).unwrap();
453 463 let data =
454 464 b"[section]\nitem=value0\n%include included.rc\nitem=value2\n\
455 465 [section2]\ncount = 4\nsize = 1.5 KB\nnot-count = 1.5\nnot-size = 1 ub";
456 466 config_file.write_all(data).unwrap();
457 467
458 468 let sources = vec![ConfigSource::AbsPath(base_config_path)];
459 469 let config = Config::load_from_explicit_sources(sources)
460 470 .expect("expected valid config");
461 471
462 472 let (_, value) = config.get_inner(b"section", b"item").unwrap();
463 473 assert_eq!(
464 474 value,
465 475 &ConfigValue {
466 476 bytes: b"value2".to_vec(),
467 477 line: Some(4)
468 478 }
469 479 );
470 480
471 481 let value = config.get(b"section", b"item").unwrap();
472 482 assert_eq!(value, b"value2",);
473 483 assert_eq!(
474 484 config.get_all(b"section", b"item"),
475 485 [b"value2", b"value1", b"value0"]
476 486 );
477 487
478 488 assert_eq!(config.get_u32(b"section2", b"count").unwrap(), Some(4));
479 489 assert_eq!(
480 490 config.get_byte_size(b"section2", b"size").unwrap(),
481 491 Some(1024 + 512)
482 492 );
483 493 assert!(config.get_u32(b"section2", b"not-count").is_err());
484 494 assert!(config.get_byte_size(b"section2", b"not-size").is_err());
485 495 }
486 496 }
@@ -1,61 +1,273 b''
1 1 //! Parsing functions for various type of configuration values.
2 2 //!
3 3 //! Returning `None` indicates a syntax error. Using a `Result` would be more
4 4 //! correct but would take more boilerplate for converting between error types,
5 5 //! compared to using `.ok()` on inner results of various error types to
6 6 //! convert them all to options. The `Config::get_parse` method later converts
7 7 //! those options to results with `ConfigValueParseError`, which contains
8 8 //! details about where the value came from (but omits details of what’s
9 9 //! invalid inside the value).
10 10
11 use crate::utils::SliceExt;
12
11 13 pub(super) fn parse_bool(v: &[u8]) -> Option<bool> {
12 14 match v.to_ascii_lowercase().as_slice() {
13 15 b"1" | b"yes" | b"true" | b"on" | b"always" => Some(true),
14 16 b"0" | b"no" | b"false" | b"off" | b"never" => Some(false),
15 17 _ => None,
16 18 }
17 19 }
18 20
19 21 pub(super) fn parse_byte_size(value: &[u8]) -> Option<u64> {
20 22 let value = std::str::from_utf8(value).ok()?.to_ascii_lowercase();
21 23 const UNITS: &[(&str, u64)] = &[
22 24 ("g", 1 << 30),
23 25 ("gb", 1 << 30),
24 26 ("m", 1 << 20),
25 27 ("mb", 1 << 20),
26 28 ("k", 1 << 10),
27 29 ("kb", 1 << 10),
28 30 ("b", 1 << 0), // Needs to be last
29 31 ];
30 32 for &(unit, multiplier) in UNITS {
31 33 // TODO: use `value.strip_suffix(unit)` when we require Rust 1.45+
32 34 if value.ends_with(unit) {
33 35 let value_before_unit = &value[..value.len() - unit.len()];
34 36 let float: f64 = value_before_unit.trim().parse().ok()?;
35 37 if float >= 0.0 {
36 38 return Some((float * multiplier as f64).round() as u64);
37 39 } else {
38 40 return None;
39 41 }
40 42 }
41 43 }
42 44 value.parse().ok()
43 45 }
44 46
47 /// Parse a config value as a list of sub-values.
48 ///
49 /// Ported from `parselist` in `mercurial/utils/stringutil.py`
50
51 // Note: keep behavior in sync with the Python one.
52
53 // Note: this could return `Vec<Cow<[u8]>>` instead and borrow `input` when
54 // possible (when there’s no backslash-escapes) but this is probably not worth
55 // the complexity as config is presumably not accessed inside
56 // preformance-sensitive loops.
57 pub(super) fn parse_list(input: &[u8]) -> Vec<Vec<u8>> {
58 // Port of Python’s `value.lstrip(b' ,\n')`
59 // TODO: is this really what we want?
60 let input =
61 input.trim_start_matches(|b| b == b' ' || b == b',' || b == b'\n');
62 parse_list_without_trim_start(input)
63 }
64
65 fn parse_list_without_trim_start(input: &[u8]) -> Vec<Vec<u8>> {
66 // Start of port of Python’s `_configlist`
67 let input = input.trim_end_matches(|b| b == b' ' || b == b',');
68 if input.is_empty() {
69 return Vec::new();
70 }
71
72 // Just to make “a string” less confusable with “a list of strings”.
73 type ByteString = Vec<u8>;
74
75 // These correspond to Python’s…
76 let mut mode = ParserMode::Plain; // `parser`
77 let mut values = Vec::new(); // `parts[:-1]`
78 let mut next_value = ByteString::new(); // `parts[-1]`
79 let mut offset = 0; // `offset`
80
81 // Setting `parser` to `None` is instead handled by returning immediately
82 enum ParserMode {
83 Plain,
84 Quoted,
85 }
86
87 loop {
88 match mode {
89 ParserMode::Plain => {
90 // Start of port of Python’s `_parse_plain`
91 let mut whitespace = false;
92 while let Some(&byte) = input.get(offset) {
93 if is_space(byte) || byte == b',' {
94 whitespace = true;
95 offset += 1;
96 } else {
97 break;
98 }
99 }
100 if let Some(&byte) = input.get(offset) {
101 if whitespace {
102 values.push(std::mem::take(&mut next_value))
103 }
104 if byte == b'"' && next_value.is_empty() {
105 mode = ParserMode::Quoted;
106 } else {
107 if byte == b'"' && next_value.ends_with(b"\\") {
108 next_value.pop();
109 }
110 next_value.push(byte);
111 }
112 offset += 1;
113 } else {
114 values.push(next_value);
115 return values;
116 }
117 }
118 ParserMode::Quoted => {
119 // Start of port of Python’s `_parse_quote`
120 if let Some(&byte) = input.get(offset) {
121 if byte == b'"' {
122 // The input contains a quoted zero-length value `""`
123 debug_assert_eq!(next_value, b"");
124 values.push(std::mem::take(&mut next_value));
125 offset += 1;
126 while let Some(&byte) = input.get(offset) {
127 if is_space(byte) || byte == b',' {
128 offset += 1;
129 } else {
130 break;
131 }
132 }
133 mode = ParserMode::Plain;
134 continue;
135 }
136 }
137
138 while let Some(&byte) = input.get(offset) {
139 if byte == b'"' {
140 break;
141 }
142 if byte == b'\\' && input.get(offset + 1) == Some(&b'"') {
143 next_value.push(b'"');
144 offset += 2;
145 } else {
146 next_value.push(byte);
147 offset += 1;
148 }
149 }
150
151 if offset >= input.len() {
152 // We didn’t find a closing double-quote,
153 // so treat the opening one as part of an unquoted value
154 // instead of delimiting the start of a quoted value.
155
156 // `next_value` may have had some backslash-escapes
157 // unescaped. TODO: shouldn’t we use a slice of `input`
158 // instead?
159 let mut real_values =
160 parse_list_without_trim_start(&next_value);
161
162 if let Some(first) = real_values.first_mut() {
163 first.insert(0, b'"');
164 // Drop `next_value`
165 values.extend(real_values)
166 } else {
167 next_value.push(b'"');
168 values.push(next_value);
169 }
170 return values;
171 }
172
173 // We’re not at the end of the input, which means the `while`
174 // loop above ended at at double quote. Skip
175 // over that.
176 offset += 1;
177
178 while let Some(&byte) = input.get(offset) {
179 if byte == b' ' || byte == b',' {
180 offset += 1;
181 } else {
182 break;
183 }
184 }
185
186 if offset >= input.len() {
187 values.push(next_value);
188 return values;
189 }
190
191 if offset + 1 == input.len() && input[offset] == b'"' {
192 next_value.push(b'"');
193 offset += 1;
194 } else {
195 values.push(std::mem::take(&mut next_value));
196 }
197
198 mode = ParserMode::Plain;
199 }
200 }
201 }
202
203 // https://docs.python.org/3/library/stdtypes.html?#bytes.isspace
204 fn is_space(byte: u8) -> bool {
205 if let b' ' | b'\t' | b'\n' | b'\r' | b'\x0b' | b'\x0c' = byte {
206 true
207 } else {
208 false
209 }
210 }
211 }
212
213 #[test]
214 fn test_parse_list() {
215 // Make `assert_eq` error messages nicer
216 fn as_strings(values: &[Vec<u8>]) -> Vec<String> {
217 values
218 .iter()
219 .map(|v| std::str::from_utf8(v.as_ref()).unwrap().to_owned())
220 .collect()
221 }
222 macro_rules! assert_parse_list {
223 ( $input: expr => [ $( $output: expr ),* ] ) => {
224 assert_eq!(
225 as_strings(&parse_list($input)),
226 as_strings(&[ $( Vec::from(&$output[..]) ),* ]),
227 );
228 }
229 }
230
231 // Keep these Rust tests in sync with the Python ones in
232 // `tests/test-config-parselist.py`
233 assert_parse_list!(b"" => []);
234 assert_parse_list!(b"," => []);
235 assert_parse_list!(b"A" => [b"A"]);
236 assert_parse_list!(b"B,B" => [b"B", b"B"]);
237 assert_parse_list!(b", C, ,C," => [b"C", b"C"]);
238 assert_parse_list!(b"\"" => [b"\""]);
239 assert_parse_list!(b"\"\"" => [b"", b""]);
240 assert_parse_list!(b"D,\"" => [b"D", b"\""]);
241 assert_parse_list!(b"E,\"\"" => [b"E", b"", b""]);
242 assert_parse_list!(b"\"F,F\"" => [b"F,F"]);
243 assert_parse_list!(b"\"G,G" => [b"\"G", b"G"]);
244 assert_parse_list!(b"\"H \\\",\\\"H" => [b"\"H", b",", b"H"]);
245 assert_parse_list!(b"I,I\"" => [b"I", b"I\""]);
246 assert_parse_list!(b"J,\"J" => [b"J", b"\"J"]);
247 assert_parse_list!(b"K K" => [b"K", b"K"]);
248 assert_parse_list!(b"\"K\" K" => [b"K", b"K"]);
249 assert_parse_list!(b"L\tL" => [b"L", b"L"]);
250 assert_parse_list!(b"\"L\"\tL" => [b"L", b"", b"L"]);
251 assert_parse_list!(b"M\x0bM" => [b"M", b"M"]);
252 assert_parse_list!(b"\"M\"\x0bM" => [b"M", b"", b"M"]);
253 assert_parse_list!(b"\"N\" , ,\"" => [b"N\""]);
254 assert_parse_list!(b"\" ,O, " => [b"\"", b"O"]);
255 }
256
45 257 #[test]
46 258 fn test_parse_byte_size() {
47 259 assert_eq!(parse_byte_size(b""), None);
48 260 assert_eq!(parse_byte_size(b"b"), None);
49 261
50 262 assert_eq!(parse_byte_size(b"12"), Some(12));
51 263 assert_eq!(parse_byte_size(b"12b"), Some(12));
52 264 assert_eq!(parse_byte_size(b"12 b"), Some(12));
53 265 assert_eq!(parse_byte_size(b"12.1 b"), Some(12));
54 266 assert_eq!(parse_byte_size(b"1.1 K"), Some(1126));
55 267 assert_eq!(parse_byte_size(b"1.1 kB"), Some(1126));
56 268
57 269 assert_eq!(parse_byte_size(b"-12 b"), None);
58 270 assert_eq!(parse_byte_size(b"-0.1 b"), None);
59 271 assert_eq!(parse_byte_size(b"0.1 b"), Some(0));
60 272 assert_eq!(parse_byte_size(b"12.1 b"), Some(12));
61 273 }
General Comments 0
You need to be logged in to leave comments. Login now