Show More
@@ -0,0 +1,52 b'' | |||
|
1 | """ | |
|
2 | List-valued configuration keys have an ad-hoc microsyntax. From `hg help config`: | |
|
3 | ||
|
4 | > List values are separated by whitespace or comma, except when values are | |
|
5 | > placed in double quotation marks: | |
|
6 | > | |
|
7 | > allow_read = "John Doe, PhD", brian, betty | |
|
8 | > | |
|
9 | > Quotation marks can be escaped by prefixing them with a backslash. Only | |
|
10 | > quotation marks at the beginning of a word is counted as a quotation | |
|
11 | > (e.g., ``foo"bar baz`` is the list of ``foo"bar`` and ``baz``). | |
|
12 | ||
|
13 | That help documentation is fairly light on details, the actual parser has many | |
|
14 | other edge cases. This test tries to cover them. | |
|
15 | """ | |
|
16 | ||
|
17 | from mercurial.utils import stringutil | |
|
18 | ||
|
19 | ||
|
20 | def assert_parselist(input, expected): | |
|
21 | result = stringutil.parselist(input) | |
|
22 | if result != expected: | |
|
23 | raise AssertionError( | |
|
24 | "parse_input(%r)\n got %r\nexpected %r" | |
|
25 | % (input, result, expected) | |
|
26 | ) | |
|
27 | ||
|
28 | ||
|
29 | # Keep these Python tests in sync with the Rust ones in `rust/hg-core/src/config/values.rs` | |
|
30 | ||
|
31 | assert_parselist(b'', []) | |
|
32 | assert_parselist(b',', []) | |
|
33 | assert_parselist(b'A', [b'A']) | |
|
34 | assert_parselist(b'B,B', [b'B', b'B']) | |
|
35 | assert_parselist(b', C, ,C,', [b'C', b'C']) | |
|
36 | assert_parselist(b'"', [b'"']) | |
|
37 | assert_parselist(b'""', [b'', b'']) | |
|
38 | assert_parselist(b'D,"', [b'D', b'"']) | |
|
39 | assert_parselist(b'E,""', [b'E', b'', b'']) | |
|
40 | assert_parselist(b'"F,F"', [b'F,F']) | |
|
41 | assert_parselist(b'"G,G', [b'"G', b'G']) | |
|
42 | assert_parselist(b'"H \\",\\"H', [b'"H', b',', b'H']) | |
|
43 | assert_parselist(b'I,I"', [b'I', b'I"']) | |
|
44 | assert_parselist(b'J,"J', [b'J', b'"J']) | |
|
45 | assert_parselist(b'K K', [b'K', b'K']) | |
|
46 | assert_parselist(b'"K" K', [b'K', b'K']) | |
|
47 | assert_parselist(b'L\tL', [b'L', b'L']) | |
|
48 | assert_parselist(b'"L"\tL', [b'L', b'', b'L']) | |
|
49 | assert_parselist(b'M\x0bM', [b'M', b'M']) | |
|
50 | assert_parselist(b'"M"\x0bM', [b'M', b'', b'M']) | |
|
51 | assert_parselist(b'"N" , ,"', [b'N"']) | |
|
52 | assert_parselist(b'" ,O, ', [b'"', b'O']) |
@@ -1,486 +1,496 b'' | |||
|
1 | 1 | // config.rs |
|
2 | 2 | // |
|
3 | 3 | // Copyright 2020 |
|
4 | 4 | // Valentin Gatien-Baron, |
|
5 | 5 | // Raphaël Gomès <rgomes@octobus.net> |
|
6 | 6 | // |
|
7 | 7 | // This software may be used and distributed according to the terms of the |
|
8 | 8 | // GNU General Public License version 2 or any later version. |
|
9 | 9 | |
|
10 | 10 | use super::layer; |
|
11 | 11 | use super::values; |
|
12 | 12 | use crate::config::layer::{ |
|
13 | 13 | ConfigError, ConfigLayer, ConfigOrigin, ConfigValue, |
|
14 | 14 | }; |
|
15 | 15 | use crate::utils::files::get_bytes_from_os_str; |
|
16 | 16 | use crate::utils::SliceExt; |
|
17 | 17 | use format_bytes::{write_bytes, DisplayBytes}; |
|
18 | 18 | use std::collections::HashSet; |
|
19 | 19 | use std::env; |
|
20 | 20 | use std::fmt; |
|
21 | 21 | use std::path::{Path, PathBuf}; |
|
22 | 22 | use std::str; |
|
23 | 23 | |
|
24 | 24 | use crate::errors::{HgResultExt, IoResultExt}; |
|
25 | 25 | |
|
26 | 26 | /// Holds the config values for the current repository |
|
27 | 27 | /// TODO update this docstring once we support more sources |
|
28 | 28 | #[derive(Clone)] |
|
29 | 29 | pub struct Config { |
|
30 | 30 | layers: Vec<layer::ConfigLayer>, |
|
31 | 31 | } |
|
32 | 32 | |
|
33 | 33 | impl DisplayBytes for Config { |
|
34 | 34 | fn display_bytes( |
|
35 | 35 | &self, |
|
36 | 36 | out: &mut dyn std::io::Write, |
|
37 | 37 | ) -> std::io::Result<()> { |
|
38 | 38 | for (index, layer) in self.layers.iter().rev().enumerate() { |
|
39 | 39 | write_bytes!( |
|
40 | 40 | out, |
|
41 | 41 | b"==== Layer {} (trusted: {}) ====\n{}", |
|
42 | 42 | index, |
|
43 | 43 | if layer.trusted { |
|
44 | 44 | &b"yes"[..] |
|
45 | 45 | } else { |
|
46 | 46 | &b"no"[..] |
|
47 | 47 | }, |
|
48 | 48 | layer |
|
49 | 49 | )?; |
|
50 | 50 | } |
|
51 | 51 | Ok(()) |
|
52 | 52 | } |
|
53 | 53 | } |
|
54 | 54 | |
|
55 | 55 | pub enum ConfigSource { |
|
56 | 56 | /// Absolute path to a config file |
|
57 | 57 | AbsPath(PathBuf), |
|
58 | 58 | /// Already parsed (from the CLI, env, Python resources, etc.) |
|
59 | 59 | Parsed(layer::ConfigLayer), |
|
60 | 60 | } |
|
61 | 61 | |
|
62 | 62 | #[derive(Debug)] |
|
63 | 63 | pub struct ConfigValueParseError { |
|
64 | 64 | pub origin: ConfigOrigin, |
|
65 | 65 | pub line: Option<usize>, |
|
66 | 66 | pub section: Vec<u8>, |
|
67 | 67 | pub item: Vec<u8>, |
|
68 | 68 | pub value: Vec<u8>, |
|
69 | 69 | pub expected_type: &'static str, |
|
70 | 70 | } |
|
71 | 71 | |
|
72 | 72 | impl fmt::Display for ConfigValueParseError { |
|
73 | 73 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
|
74 | 74 | // TODO: add origin and line number information, here and in |
|
75 | 75 | // corresponding python code |
|
76 | 76 | write!( |
|
77 | 77 | f, |
|
78 | 78 | "config error: {}.{} is not a {} ('{}')", |
|
79 | 79 | String::from_utf8_lossy(&self.section), |
|
80 | 80 | String::from_utf8_lossy(&self.item), |
|
81 | 81 | self.expected_type, |
|
82 | 82 | String::from_utf8_lossy(&self.value) |
|
83 | 83 | ) |
|
84 | 84 | } |
|
85 | 85 | } |
|
86 | 86 | |
|
87 | 87 | impl Config { |
|
88 | 88 | /// Load system and user configuration from various files. |
|
89 | 89 | /// |
|
90 | 90 | /// This is also affected by some environment variables. |
|
91 | 91 | pub fn load_non_repo() -> Result<Self, ConfigError> { |
|
92 | 92 | let mut config = Self { layers: Vec::new() }; |
|
93 | 93 | let opt_rc_path = env::var_os("HGRCPATH"); |
|
94 | 94 | // HGRCPATH replaces system config |
|
95 | 95 | if opt_rc_path.is_none() { |
|
96 | 96 | config.add_system_config()? |
|
97 | 97 | } |
|
98 | 98 | |
|
99 | 99 | config.add_for_environment_variable("EDITOR", b"ui", b"editor"); |
|
100 | 100 | config.add_for_environment_variable("VISUAL", b"ui", b"editor"); |
|
101 | 101 | config.add_for_environment_variable("PAGER", b"pager", b"pager"); |
|
102 | 102 | |
|
103 | 103 | // These are set by `run-tests.py --rhg` to enable fallback for the |
|
104 | 104 | // entire test suite. Alternatives would be setting configuration |
|
105 | 105 | // through `$HGRCPATH` but some tests override that, or changing the |
|
106 | 106 | // `hg` shell alias to include `--config` but that disrupts tests that |
|
107 | 107 | // print command lines and check expected output. |
|
108 | 108 | config.add_for_environment_variable( |
|
109 | 109 | "RHG_ON_UNSUPPORTED", |
|
110 | 110 | b"rhg", |
|
111 | 111 | b"on-unsupported", |
|
112 | 112 | ); |
|
113 | 113 | config.add_for_environment_variable( |
|
114 | 114 | "RHG_FALLBACK_EXECUTABLE", |
|
115 | 115 | b"rhg", |
|
116 | 116 | b"fallback-executable", |
|
117 | 117 | ); |
|
118 | 118 | |
|
119 | 119 | // HGRCPATH replaces user config |
|
120 | 120 | if opt_rc_path.is_none() { |
|
121 | 121 | config.add_user_config()? |
|
122 | 122 | } |
|
123 | 123 | if let Some(rc_path) = &opt_rc_path { |
|
124 | 124 | for path in env::split_paths(rc_path) { |
|
125 | 125 | if !path.as_os_str().is_empty() { |
|
126 | 126 | if path.is_dir() { |
|
127 | 127 | config.add_trusted_dir(&path)? |
|
128 | 128 | } else { |
|
129 | 129 | config.add_trusted_file(&path)? |
|
130 | 130 | } |
|
131 | 131 | } |
|
132 | 132 | } |
|
133 | 133 | } |
|
134 | 134 | Ok(config) |
|
135 | 135 | } |
|
136 | 136 | |
|
137 | 137 | pub fn load_cli_args_config( |
|
138 | 138 | &mut self, |
|
139 | 139 | cli_config_args: impl IntoIterator<Item = impl AsRef<[u8]>>, |
|
140 | 140 | ) -> Result<(), ConfigError> { |
|
141 | 141 | if let Some(layer) = ConfigLayer::parse_cli_args(cli_config_args)? { |
|
142 | 142 | self.layers.push(layer) |
|
143 | 143 | } |
|
144 | 144 | Ok(()) |
|
145 | 145 | } |
|
146 | 146 | |
|
147 | 147 | fn add_trusted_dir(&mut self, path: &Path) -> Result<(), ConfigError> { |
|
148 | 148 | if let Some(entries) = std::fs::read_dir(path) |
|
149 | 149 | .when_reading_file(path) |
|
150 | 150 | .io_not_found_as_none()? |
|
151 | 151 | { |
|
152 | 152 | let mut file_paths = entries |
|
153 | 153 | .map(|result| { |
|
154 | 154 | result.when_reading_file(path).map(|entry| entry.path()) |
|
155 | 155 | }) |
|
156 | 156 | .collect::<Result<Vec<_>, _>>()?; |
|
157 | 157 | file_paths.sort(); |
|
158 | 158 | for file_path in &file_paths { |
|
159 | 159 | if file_path.extension() == Some(std::ffi::OsStr::new("rc")) { |
|
160 | 160 | self.add_trusted_file(&file_path)? |
|
161 | 161 | } |
|
162 | 162 | } |
|
163 | 163 | } |
|
164 | 164 | Ok(()) |
|
165 | 165 | } |
|
166 | 166 | |
|
167 | 167 | fn add_trusted_file(&mut self, path: &Path) -> Result<(), ConfigError> { |
|
168 | 168 | if let Some(data) = std::fs::read(path) |
|
169 | 169 | .when_reading_file(path) |
|
170 | 170 | .io_not_found_as_none()? |
|
171 | 171 | { |
|
172 | 172 | self.layers.extend(ConfigLayer::parse(path, &data)?) |
|
173 | 173 | } |
|
174 | 174 | Ok(()) |
|
175 | 175 | } |
|
176 | 176 | |
|
177 | 177 | fn add_for_environment_variable( |
|
178 | 178 | &mut self, |
|
179 | 179 | var: &str, |
|
180 | 180 | section: &[u8], |
|
181 | 181 | key: &[u8], |
|
182 | 182 | ) { |
|
183 | 183 | if let Some(value) = env::var_os(var) { |
|
184 | 184 | let origin = layer::ConfigOrigin::Environment(var.into()); |
|
185 | 185 | let mut layer = ConfigLayer::new(origin); |
|
186 | 186 | layer.add( |
|
187 | 187 | section.to_owned(), |
|
188 | 188 | key.to_owned(), |
|
189 | 189 | get_bytes_from_os_str(value), |
|
190 | 190 | None, |
|
191 | 191 | ); |
|
192 | 192 | self.layers.push(layer) |
|
193 | 193 | } |
|
194 | 194 | } |
|
195 | 195 | |
|
196 | 196 | #[cfg(unix)] // TODO: other platforms |
|
197 | 197 | fn add_system_config(&mut self) -> Result<(), ConfigError> { |
|
198 | 198 | let mut add_for_prefix = |prefix: &Path| -> Result<(), ConfigError> { |
|
199 | 199 | let etc = prefix.join("etc").join("mercurial"); |
|
200 | 200 | self.add_trusted_file(&etc.join("hgrc"))?; |
|
201 | 201 | self.add_trusted_dir(&etc.join("hgrc.d")) |
|
202 | 202 | }; |
|
203 | 203 | let root = Path::new("/"); |
|
204 | 204 | // TODO: use `std::env::args_os().next().unwrap()` a.k.a. argv[0] |
|
205 | 205 | // instead? TODO: can this be a relative path? |
|
206 | 206 | let hg = crate::utils::current_exe()?; |
|
207 | 207 | // TODO: this order (per-installation then per-system) matches |
|
208 | 208 | // `systemrcpath()` in `mercurial/scmposix.py`, but |
|
209 | 209 | // `mercurial/helptext/config.txt` suggests it should be reversed |
|
210 | 210 | if let Some(installation_prefix) = hg.parent().and_then(Path::parent) { |
|
211 | 211 | if installation_prefix != root { |
|
212 | 212 | add_for_prefix(&installation_prefix)? |
|
213 | 213 | } |
|
214 | 214 | } |
|
215 | 215 | add_for_prefix(root)?; |
|
216 | 216 | Ok(()) |
|
217 | 217 | } |
|
218 | 218 | |
|
219 | 219 | #[cfg(unix)] // TODO: other plateforms |
|
220 | 220 | fn add_user_config(&mut self) -> Result<(), ConfigError> { |
|
221 | 221 | let opt_home = home::home_dir(); |
|
222 | 222 | if let Some(home) = &opt_home { |
|
223 | 223 | self.add_trusted_file(&home.join(".hgrc"))? |
|
224 | 224 | } |
|
225 | 225 | let darwin = cfg!(any(target_os = "macos", target_os = "ios")); |
|
226 | 226 | if !darwin { |
|
227 | 227 | if let Some(config_home) = env::var_os("XDG_CONFIG_HOME") |
|
228 | 228 | .map(PathBuf::from) |
|
229 | 229 | .or_else(|| opt_home.map(|home| home.join(".config"))) |
|
230 | 230 | { |
|
231 | 231 | self.add_trusted_file(&config_home.join("hg").join("hgrc"))? |
|
232 | 232 | } |
|
233 | 233 | } |
|
234 | 234 | Ok(()) |
|
235 | 235 | } |
|
236 | 236 | |
|
237 | 237 | /// Loads in order, which means that the precedence is the same |
|
238 | 238 | /// as the order of `sources`. |
|
239 | 239 | pub fn load_from_explicit_sources( |
|
240 | 240 | sources: Vec<ConfigSource>, |
|
241 | 241 | ) -> Result<Self, ConfigError> { |
|
242 | 242 | let mut layers = vec![]; |
|
243 | 243 | |
|
244 | 244 | for source in sources.into_iter() { |
|
245 | 245 | match source { |
|
246 | 246 | ConfigSource::Parsed(c) => layers.push(c), |
|
247 | 247 | ConfigSource::AbsPath(c) => { |
|
248 | 248 | // TODO check if it should be trusted |
|
249 | 249 | // mercurial/ui.py:427 |
|
250 | 250 | let data = match std::fs::read(&c) { |
|
251 | 251 | Err(_) => continue, // same as the python code |
|
252 | 252 | Ok(data) => data, |
|
253 | 253 | }; |
|
254 | 254 | layers.extend(ConfigLayer::parse(&c, &data)?) |
|
255 | 255 | } |
|
256 | 256 | } |
|
257 | 257 | } |
|
258 | 258 | |
|
259 | 259 | Ok(Config { layers }) |
|
260 | 260 | } |
|
261 | 261 | |
|
262 | 262 | /// Loads the per-repository config into a new `Config` which is combined |
|
263 | 263 | /// with `self`. |
|
264 | 264 | pub(crate) fn combine_with_repo( |
|
265 | 265 | &self, |
|
266 | 266 | repo_config_files: &[PathBuf], |
|
267 | 267 | ) -> Result<Self, ConfigError> { |
|
268 | 268 | let (cli_layers, other_layers) = self |
|
269 | 269 | .layers |
|
270 | 270 | .iter() |
|
271 | 271 | .cloned() |
|
272 | 272 | .partition(ConfigLayer::is_from_command_line); |
|
273 | 273 | |
|
274 | 274 | let mut repo_config = Self { |
|
275 | 275 | layers: other_layers, |
|
276 | 276 | }; |
|
277 | 277 | for path in repo_config_files { |
|
278 | 278 | // TODO: check if this file should be trusted: |
|
279 | 279 | // `mercurial/ui.py:427` |
|
280 | 280 | repo_config.add_trusted_file(path)?; |
|
281 | 281 | } |
|
282 | 282 | repo_config.layers.extend(cli_layers); |
|
283 | 283 | Ok(repo_config) |
|
284 | 284 | } |
|
285 | 285 | |
|
286 | 286 | fn get_parse<'config, T: 'config>( |
|
287 | 287 | &'config self, |
|
288 | 288 | section: &[u8], |
|
289 | 289 | item: &[u8], |
|
290 | 290 | expected_type: &'static str, |
|
291 | 291 | parse: impl Fn(&'config [u8]) -> Option<T>, |
|
292 | 292 | ) -> Result<Option<T>, ConfigValueParseError> { |
|
293 | 293 | match self.get_inner(§ion, &item) { |
|
294 | 294 | Some((layer, v)) => match parse(&v.bytes) { |
|
295 | 295 | Some(b) => Ok(Some(b)), |
|
296 | 296 | None => Err(ConfigValueParseError { |
|
297 | 297 | origin: layer.origin.to_owned(), |
|
298 | 298 | line: v.line, |
|
299 | 299 | value: v.bytes.to_owned(), |
|
300 | 300 | section: section.to_owned(), |
|
301 | 301 | item: item.to_owned(), |
|
302 | 302 | expected_type, |
|
303 | 303 | }), |
|
304 | 304 | }, |
|
305 | 305 | None => Ok(None), |
|
306 | 306 | } |
|
307 | 307 | } |
|
308 | 308 | |
|
309 | 309 | /// Returns an `Err` if the first value found is not a valid UTF-8 string. |
|
310 | 310 | /// Otherwise, returns an `Ok(value)` if found, or `None`. |
|
311 | 311 | pub fn get_str( |
|
312 | 312 | &self, |
|
313 | 313 | section: &[u8], |
|
314 | 314 | item: &[u8], |
|
315 | 315 | ) -> Result<Option<&str>, ConfigValueParseError> { |
|
316 | 316 | self.get_parse(section, item, "ASCII or UTF-8 string", |value| { |
|
317 | 317 | str::from_utf8(value).ok() |
|
318 | 318 | }) |
|
319 | 319 | } |
|
320 | 320 | |
|
321 | 321 | /// Returns an `Err` if the first value found is not a valid unsigned |
|
322 | 322 | /// integer. Otherwise, returns an `Ok(value)` if found, or `None`. |
|
323 | 323 | pub fn get_u32( |
|
324 | 324 | &self, |
|
325 | 325 | section: &[u8], |
|
326 | 326 | item: &[u8], |
|
327 | 327 | ) -> Result<Option<u32>, ConfigValueParseError> { |
|
328 | 328 | self.get_parse(section, item, "valid integer", |value| { |
|
329 | 329 | str::from_utf8(value).ok()?.parse().ok() |
|
330 | 330 | }) |
|
331 | 331 | } |
|
332 | 332 | |
|
333 | 333 | /// Returns an `Err` if the first value found is not a valid file size |
|
334 | 334 | /// value such as `30` (default unit is bytes), `7 MB`, or `42.5 kb`. |
|
335 | 335 | /// Otherwise, returns an `Ok(value_in_bytes)` if found, or `None`. |
|
336 | 336 | pub fn get_byte_size( |
|
337 | 337 | &self, |
|
338 | 338 | section: &[u8], |
|
339 | 339 | item: &[u8], |
|
340 | 340 | ) -> Result<Option<u64>, ConfigValueParseError> { |
|
341 | 341 | self.get_parse(section, item, "byte quantity", values::parse_byte_size) |
|
342 | 342 | } |
|
343 | 343 | |
|
344 | 344 | /// Returns an `Err` if the first value found is not a valid boolean. |
|
345 | 345 | /// Otherwise, returns an `Ok(option)`, where `option` is the boolean if |
|
346 | 346 | /// found, or `None`. |
|
347 | 347 | pub fn get_option( |
|
348 | 348 | &self, |
|
349 | 349 | section: &[u8], |
|
350 | 350 | item: &[u8], |
|
351 | 351 | ) -> Result<Option<bool>, ConfigValueParseError> { |
|
352 | 352 | self.get_parse(section, item, "boolean", values::parse_bool) |
|
353 | 353 | } |
|
354 | 354 | |
|
355 | 355 | /// Returns the corresponding boolean in the config. Returns `Ok(false)` |
|
356 | 356 | /// if the value is not found, an `Err` if it's not a valid boolean. |
|
357 | 357 | pub fn get_bool( |
|
358 | 358 | &self, |
|
359 | 359 | section: &[u8], |
|
360 | 360 | item: &[u8], |
|
361 | 361 | ) -> Result<bool, ConfigValueParseError> { |
|
362 | 362 | Ok(self.get_option(section, item)?.unwrap_or(false)) |
|
363 | 363 | } |
|
364 | 364 | |
|
365 | 365 | /// Returns the corresponding list-value in the config if found, or `None`. |
|
366 | 366 | /// |
|
367 | 367 | /// This is appropriate for new configuration keys. The value syntax is |
|
368 | 368 | /// **not** the same as most existing list-valued config, which has Python |
|
369 | 369 | /// parsing implemented in `parselist()` in |
|
370 | 370 | /// `mercurial/utils/stringutil.py`. Faithfully porting that parsing |
|
371 | 371 | /// algorithm to Rust (including behavior that are arguably bugs) |
|
372 | 372 | /// turned out to be non-trivial and hasn’t been completed as of this |
|
373 | 373 | /// writing. |
|
374 | 374 | /// |
|
375 | 375 | /// Instead, the "simple" syntax is: split on comma, then trim leading and |
|
376 | 376 | /// trailing whitespace of each component. Quotes or backslashes are not |
|
377 | 377 | /// interpreted in any way. Commas are mandatory between values. Values |
|
378 | 378 | /// that contain a comma are not supported. |
|
379 | 379 | pub fn get_simple_list( |
|
380 | 380 | &self, |
|
381 | 381 | section: &[u8], |
|
382 | 382 | item: &[u8], |
|
383 | 383 | ) -> Option<impl Iterator<Item = &[u8]>> { |
|
384 | 384 | self.get(section, item).map(|value| { |
|
385 | 385 | value |
|
386 | 386 | .split(|&byte| byte == b',') |
|
387 | 387 | .map(|component| component.trim()) |
|
388 | 388 | }) |
|
389 | 389 | } |
|
390 | 390 | |
|
391 | /// If there is an `item` value in `section`, parse and return a list of | |
|
392 | /// byte strings. | |
|
393 | pub fn get_list( | |
|
394 | &self, | |
|
395 | section: &[u8], | |
|
396 | item: &[u8], | |
|
397 | ) -> Option<Vec<Vec<u8>>> { | |
|
398 | self.get(section, item).map(values::parse_list) | |
|
399 | } | |
|
400 | ||
|
391 | 401 | /// Returns the raw value bytes of the first one found, or `None`. |
|
392 | 402 | pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&[u8]> { |
|
393 | 403 | self.get_inner(section, item) |
|
394 | 404 | .map(|(_, value)| value.bytes.as_ref()) |
|
395 | 405 | } |
|
396 | 406 | |
|
397 | 407 | /// Returns the layer and the value of the first one found, or `None`. |
|
398 | 408 | fn get_inner( |
|
399 | 409 | &self, |
|
400 | 410 | section: &[u8], |
|
401 | 411 | item: &[u8], |
|
402 | 412 | ) -> Option<(&ConfigLayer, &ConfigValue)> { |
|
403 | 413 | for layer in self.layers.iter().rev() { |
|
404 | 414 | if !layer.trusted { |
|
405 | 415 | continue; |
|
406 | 416 | } |
|
407 | 417 | if let Some(v) = layer.get(§ion, &item) { |
|
408 | 418 | return Some((&layer, v)); |
|
409 | 419 | } |
|
410 | 420 | } |
|
411 | 421 | None |
|
412 | 422 | } |
|
413 | 423 | |
|
414 | 424 | /// Return all keys defined for the given section |
|
415 | 425 | pub fn get_section_keys(&self, section: &[u8]) -> HashSet<&[u8]> { |
|
416 | 426 | self.layers |
|
417 | 427 | .iter() |
|
418 | 428 | .flat_map(|layer| layer.iter_keys(section)) |
|
419 | 429 | .collect() |
|
420 | 430 | } |
|
421 | 431 | |
|
422 | 432 | /// Get raw values bytes from all layers (even untrusted ones) in order |
|
423 | 433 | /// of precedence. |
|
424 | 434 | #[cfg(test)] |
|
425 | 435 | fn get_all(&self, section: &[u8], item: &[u8]) -> Vec<&[u8]> { |
|
426 | 436 | let mut res = vec![]; |
|
427 | 437 | for layer in self.layers.iter().rev() { |
|
428 | 438 | if let Some(v) = layer.get(§ion, &item) { |
|
429 | 439 | res.push(v.bytes.as_ref()); |
|
430 | 440 | } |
|
431 | 441 | } |
|
432 | 442 | res |
|
433 | 443 | } |
|
434 | 444 | } |
|
435 | 445 | |
|
436 | 446 | #[cfg(test)] |
|
437 | 447 | mod tests { |
|
438 | 448 | use super::*; |
|
439 | 449 | use pretty_assertions::assert_eq; |
|
440 | 450 | use std::fs::File; |
|
441 | 451 | use std::io::Write; |
|
442 | 452 | |
|
443 | 453 | #[test] |
|
444 | 454 | fn test_include_layer_ordering() { |
|
445 | 455 | let tmpdir = tempfile::tempdir().unwrap(); |
|
446 | 456 | let tmpdir_path = tmpdir.path(); |
|
447 | 457 | let mut included_file = |
|
448 | 458 | File::create(&tmpdir_path.join("included.rc")).unwrap(); |
|
449 | 459 | |
|
450 | 460 | included_file.write_all(b"[section]\nitem=value1").unwrap(); |
|
451 | 461 | let base_config_path = tmpdir_path.join("base.rc"); |
|
452 | 462 | let mut config_file = File::create(&base_config_path).unwrap(); |
|
453 | 463 | let data = |
|
454 | 464 | b"[section]\nitem=value0\n%include included.rc\nitem=value2\n\ |
|
455 | 465 | [section2]\ncount = 4\nsize = 1.5 KB\nnot-count = 1.5\nnot-size = 1 ub"; |
|
456 | 466 | config_file.write_all(data).unwrap(); |
|
457 | 467 | |
|
458 | 468 | let sources = vec![ConfigSource::AbsPath(base_config_path)]; |
|
459 | 469 | let config = Config::load_from_explicit_sources(sources) |
|
460 | 470 | .expect("expected valid config"); |
|
461 | 471 | |
|
462 | 472 | let (_, value) = config.get_inner(b"section", b"item").unwrap(); |
|
463 | 473 | assert_eq!( |
|
464 | 474 | value, |
|
465 | 475 | &ConfigValue { |
|
466 | 476 | bytes: b"value2".to_vec(), |
|
467 | 477 | line: Some(4) |
|
468 | 478 | } |
|
469 | 479 | ); |
|
470 | 480 | |
|
471 | 481 | let value = config.get(b"section", b"item").unwrap(); |
|
472 | 482 | assert_eq!(value, b"value2",); |
|
473 | 483 | assert_eq!( |
|
474 | 484 | config.get_all(b"section", b"item"), |
|
475 | 485 | [b"value2", b"value1", b"value0"] |
|
476 | 486 | ); |
|
477 | 487 | |
|
478 | 488 | assert_eq!(config.get_u32(b"section2", b"count").unwrap(), Some(4)); |
|
479 | 489 | assert_eq!( |
|
480 | 490 | config.get_byte_size(b"section2", b"size").unwrap(), |
|
481 | 491 | Some(1024 + 512) |
|
482 | 492 | ); |
|
483 | 493 | assert!(config.get_u32(b"section2", b"not-count").is_err()); |
|
484 | 494 | assert!(config.get_byte_size(b"section2", b"not-size").is_err()); |
|
485 | 495 | } |
|
486 | 496 | } |
@@ -1,61 +1,273 b'' | |||
|
1 | 1 | //! Parsing functions for various type of configuration values. |
|
2 | 2 | //! |
|
3 | 3 | //! Returning `None` indicates a syntax error. Using a `Result` would be more |
|
4 | 4 | //! correct but would take more boilerplate for converting between error types, |
|
5 | 5 | //! compared to using `.ok()` on inner results of various error types to |
|
6 | 6 | //! convert them all to options. The `Config::get_parse` method later converts |
|
7 | 7 | //! those options to results with `ConfigValueParseError`, which contains |
|
8 | 8 | //! details about where the value came from (but omits details of what’s |
|
9 | 9 | //! invalid inside the value). |
|
10 | 10 | |
|
11 | use crate::utils::SliceExt; | |
|
12 | ||
|
11 | 13 | pub(super) fn parse_bool(v: &[u8]) -> Option<bool> { |
|
12 | 14 | match v.to_ascii_lowercase().as_slice() { |
|
13 | 15 | b"1" | b"yes" | b"true" | b"on" | b"always" => Some(true), |
|
14 | 16 | b"0" | b"no" | b"false" | b"off" | b"never" => Some(false), |
|
15 | 17 | _ => None, |
|
16 | 18 | } |
|
17 | 19 | } |
|
18 | 20 | |
|
19 | 21 | pub(super) fn parse_byte_size(value: &[u8]) -> Option<u64> { |
|
20 | 22 | let value = std::str::from_utf8(value).ok()?.to_ascii_lowercase(); |
|
21 | 23 | const UNITS: &[(&str, u64)] = &[ |
|
22 | 24 | ("g", 1 << 30), |
|
23 | 25 | ("gb", 1 << 30), |
|
24 | 26 | ("m", 1 << 20), |
|
25 | 27 | ("mb", 1 << 20), |
|
26 | 28 | ("k", 1 << 10), |
|
27 | 29 | ("kb", 1 << 10), |
|
28 | 30 | ("b", 1 << 0), // Needs to be last |
|
29 | 31 | ]; |
|
30 | 32 | for &(unit, multiplier) in UNITS { |
|
31 | 33 | // TODO: use `value.strip_suffix(unit)` when we require Rust 1.45+ |
|
32 | 34 | if value.ends_with(unit) { |
|
33 | 35 | let value_before_unit = &value[..value.len() - unit.len()]; |
|
34 | 36 | let float: f64 = value_before_unit.trim().parse().ok()?; |
|
35 | 37 | if float >= 0.0 { |
|
36 | 38 | return Some((float * multiplier as f64).round() as u64); |
|
37 | 39 | } else { |
|
38 | 40 | return None; |
|
39 | 41 | } |
|
40 | 42 | } |
|
41 | 43 | } |
|
42 | 44 | value.parse().ok() |
|
43 | 45 | } |
|
44 | 46 | |
|
47 | /// Parse a config value as a list of sub-values. | |
|
48 | /// | |
|
49 | /// Ported from `parselist` in `mercurial/utils/stringutil.py` | |
|
50 | ||
|
51 | // Note: keep behavior in sync with the Python one. | |
|
52 | ||
|
53 | // Note: this could return `Vec<Cow<[u8]>>` instead and borrow `input` when | |
|
54 | // possible (when there’s no backslash-escapes) but this is probably not worth | |
|
55 | // the complexity as config is presumably not accessed inside | |
|
56 | // preformance-sensitive loops. | |
|
57 | pub(super) fn parse_list(input: &[u8]) -> Vec<Vec<u8>> { | |
|
58 | // Port of Python’s `value.lstrip(b' ,\n')` | |
|
59 | // TODO: is this really what we want? | |
|
60 | let input = | |
|
61 | input.trim_start_matches(|b| b == b' ' || b == b',' || b == b'\n'); | |
|
62 | parse_list_without_trim_start(input) | |
|
63 | } | |
|
64 | ||
|
65 | fn parse_list_without_trim_start(input: &[u8]) -> Vec<Vec<u8>> { | |
|
66 | // Start of port of Python’s `_configlist` | |
|
67 | let input = input.trim_end_matches(|b| b == b' ' || b == b','); | |
|
68 | if input.is_empty() { | |
|
69 | return Vec::new(); | |
|
70 | } | |
|
71 | ||
|
72 | // Just to make “a string” less confusable with “a list of strings”. | |
|
73 | type ByteString = Vec<u8>; | |
|
74 | ||
|
75 | // These correspond to Python’s… | |
|
76 | let mut mode = ParserMode::Plain; // `parser` | |
|
77 | let mut values = Vec::new(); // `parts[:-1]` | |
|
78 | let mut next_value = ByteString::new(); // `parts[-1]` | |
|
79 | let mut offset = 0; // `offset` | |
|
80 | ||
|
81 | // Setting `parser` to `None` is instead handled by returning immediately | |
|
82 | enum ParserMode { | |
|
83 | Plain, | |
|
84 | Quoted, | |
|
85 | } | |
|
86 | ||
|
87 | loop { | |
|
88 | match mode { | |
|
89 | ParserMode::Plain => { | |
|
90 | // Start of port of Python’s `_parse_plain` | |
|
91 | let mut whitespace = false; | |
|
92 | while let Some(&byte) = input.get(offset) { | |
|
93 | if is_space(byte) || byte == b',' { | |
|
94 | whitespace = true; | |
|
95 | offset += 1; | |
|
96 | } else { | |
|
97 | break; | |
|
98 | } | |
|
99 | } | |
|
100 | if let Some(&byte) = input.get(offset) { | |
|
101 | if whitespace { | |
|
102 | values.push(std::mem::take(&mut next_value)) | |
|
103 | } | |
|
104 | if byte == b'"' && next_value.is_empty() { | |
|
105 | mode = ParserMode::Quoted; | |
|
106 | } else { | |
|
107 | if byte == b'"' && next_value.ends_with(b"\\") { | |
|
108 | next_value.pop(); | |
|
109 | } | |
|
110 | next_value.push(byte); | |
|
111 | } | |
|
112 | offset += 1; | |
|
113 | } else { | |
|
114 | values.push(next_value); | |
|
115 | return values; | |
|
116 | } | |
|
117 | } | |
|
118 | ParserMode::Quoted => { | |
|
119 | // Start of port of Python’s `_parse_quote` | |
|
120 | if let Some(&byte) = input.get(offset) { | |
|
121 | if byte == b'"' { | |
|
122 | // The input contains a quoted zero-length value `""` | |
|
123 | debug_assert_eq!(next_value, b""); | |
|
124 | values.push(std::mem::take(&mut next_value)); | |
|
125 | offset += 1; | |
|
126 | while let Some(&byte) = input.get(offset) { | |
|
127 | if is_space(byte) || byte == b',' { | |
|
128 | offset += 1; | |
|
129 | } else { | |
|
130 | break; | |
|
131 | } | |
|
132 | } | |
|
133 | mode = ParserMode::Plain; | |
|
134 | continue; | |
|
135 | } | |
|
136 | } | |
|
137 | ||
|
138 | while let Some(&byte) = input.get(offset) { | |
|
139 | if byte == b'"' { | |
|
140 | break; | |
|
141 | } | |
|
142 | if byte == b'\\' && input.get(offset + 1) == Some(&b'"') { | |
|
143 | next_value.push(b'"'); | |
|
144 | offset += 2; | |
|
145 | } else { | |
|
146 | next_value.push(byte); | |
|
147 | offset += 1; | |
|
148 | } | |
|
149 | } | |
|
150 | ||
|
151 | if offset >= input.len() { | |
|
152 | // We didn’t find a closing double-quote, | |
|
153 | // so treat the opening one as part of an unquoted value | |
|
154 | // instead of delimiting the start of a quoted value. | |
|
155 | ||
|
156 | // `next_value` may have had some backslash-escapes | |
|
157 | // unescaped. TODO: shouldn’t we use a slice of `input` | |
|
158 | // instead? | |
|
159 | let mut real_values = | |
|
160 | parse_list_without_trim_start(&next_value); | |
|
161 | ||
|
162 | if let Some(first) = real_values.first_mut() { | |
|
163 | first.insert(0, b'"'); | |
|
164 | // Drop `next_value` | |
|
165 | values.extend(real_values) | |
|
166 | } else { | |
|
167 | next_value.push(b'"'); | |
|
168 | values.push(next_value); | |
|
169 | } | |
|
170 | return values; | |
|
171 | } | |
|
172 | ||
|
173 | // We’re not at the end of the input, which means the `while` | |
|
174 | // loop above ended at at double quote. Skip | |
|
175 | // over that. | |
|
176 | offset += 1; | |
|
177 | ||
|
178 | while let Some(&byte) = input.get(offset) { | |
|
179 | if byte == b' ' || byte == b',' { | |
|
180 | offset += 1; | |
|
181 | } else { | |
|
182 | break; | |
|
183 | } | |
|
184 | } | |
|
185 | ||
|
186 | if offset >= input.len() { | |
|
187 | values.push(next_value); | |
|
188 | return values; | |
|
189 | } | |
|
190 | ||
|
191 | if offset + 1 == input.len() && input[offset] == b'"' { | |
|
192 | next_value.push(b'"'); | |
|
193 | offset += 1; | |
|
194 | } else { | |
|
195 | values.push(std::mem::take(&mut next_value)); | |
|
196 | } | |
|
197 | ||
|
198 | mode = ParserMode::Plain; | |
|
199 | } | |
|
200 | } | |
|
201 | } | |
|
202 | ||
|
203 | // https://docs.python.org/3/library/stdtypes.html?#bytes.isspace | |
|
204 | fn is_space(byte: u8) -> bool { | |
|
205 | if let b' ' | b'\t' | b'\n' | b'\r' | b'\x0b' | b'\x0c' = byte { | |
|
206 | true | |
|
207 | } else { | |
|
208 | false | |
|
209 | } | |
|
210 | } | |
|
211 | } | |
|
212 | ||
|
213 | #[test] | |
|
214 | fn test_parse_list() { | |
|
215 | // Make `assert_eq` error messages nicer | |
|
216 | fn as_strings(values: &[Vec<u8>]) -> Vec<String> { | |
|
217 | values | |
|
218 | .iter() | |
|
219 | .map(|v| std::str::from_utf8(v.as_ref()).unwrap().to_owned()) | |
|
220 | .collect() | |
|
221 | } | |
|
222 | macro_rules! assert_parse_list { | |
|
223 | ( $input: expr => [ $( $output: expr ),* ] ) => { | |
|
224 | assert_eq!( | |
|
225 | as_strings(&parse_list($input)), | |
|
226 | as_strings(&[ $( Vec::from(&$output[..]) ),* ]), | |
|
227 | ); | |
|
228 | } | |
|
229 | } | |
|
230 | ||
|
231 | // Keep these Rust tests in sync with the Python ones in | |
|
232 | // `tests/test-config-parselist.py` | |
|
233 | assert_parse_list!(b"" => []); | |
|
234 | assert_parse_list!(b"," => []); | |
|
235 | assert_parse_list!(b"A" => [b"A"]); | |
|
236 | assert_parse_list!(b"B,B" => [b"B", b"B"]); | |
|
237 | assert_parse_list!(b", C, ,C," => [b"C", b"C"]); | |
|
238 | assert_parse_list!(b"\"" => [b"\""]); | |
|
239 | assert_parse_list!(b"\"\"" => [b"", b""]); | |
|
240 | assert_parse_list!(b"D,\"" => [b"D", b"\""]); | |
|
241 | assert_parse_list!(b"E,\"\"" => [b"E", b"", b""]); | |
|
242 | assert_parse_list!(b"\"F,F\"" => [b"F,F"]); | |
|
243 | assert_parse_list!(b"\"G,G" => [b"\"G", b"G"]); | |
|
244 | assert_parse_list!(b"\"H \\\",\\\"H" => [b"\"H", b",", b"H"]); | |
|
245 | assert_parse_list!(b"I,I\"" => [b"I", b"I\""]); | |
|
246 | assert_parse_list!(b"J,\"J" => [b"J", b"\"J"]); | |
|
247 | assert_parse_list!(b"K K" => [b"K", b"K"]); | |
|
248 | assert_parse_list!(b"\"K\" K" => [b"K", b"K"]); | |
|
249 | assert_parse_list!(b"L\tL" => [b"L", b"L"]); | |
|
250 | assert_parse_list!(b"\"L\"\tL" => [b"L", b"", b"L"]); | |
|
251 | assert_parse_list!(b"M\x0bM" => [b"M", b"M"]); | |
|
252 | assert_parse_list!(b"\"M\"\x0bM" => [b"M", b"", b"M"]); | |
|
253 | assert_parse_list!(b"\"N\" , ,\"" => [b"N\""]); | |
|
254 | assert_parse_list!(b"\" ,O, " => [b"\"", b"O"]); | |
|
255 | } | |
|
256 | ||
|
45 | 257 | #[test] |
|
46 | 258 | fn test_parse_byte_size() { |
|
47 | 259 | assert_eq!(parse_byte_size(b""), None); |
|
48 | 260 | assert_eq!(parse_byte_size(b"b"), None); |
|
49 | 261 | |
|
50 | 262 | assert_eq!(parse_byte_size(b"12"), Some(12)); |
|
51 | 263 | assert_eq!(parse_byte_size(b"12b"), Some(12)); |
|
52 | 264 | assert_eq!(parse_byte_size(b"12 b"), Some(12)); |
|
53 | 265 | assert_eq!(parse_byte_size(b"12.1 b"), Some(12)); |
|
54 | 266 | assert_eq!(parse_byte_size(b"1.1 K"), Some(1126)); |
|
55 | 267 | assert_eq!(parse_byte_size(b"1.1 kB"), Some(1126)); |
|
56 | 268 | |
|
57 | 269 | assert_eq!(parse_byte_size(b"-12 b"), None); |
|
58 | 270 | assert_eq!(parse_byte_size(b"-0.1 b"), None); |
|
59 | 271 | assert_eq!(parse_byte_size(b"0.1 b"), Some(0)); |
|
60 | 272 | assert_eq!(parse_byte_size(b"12.1 b"), Some(12)); |
|
61 | 273 | } |
General Comments 0
You need to be logged in to leave comments.
Login now