##// END OF EJS Templates
rhg: Port Python’s `ui.configlist` as `Config::get_list`...
Simon Sapin -
r48762:6961eca0 default
parent child Browse files
Show More
@@ -0,0 +1,52 b''
1 """
2 List-valued configuration keys have an ad-hoc microsyntax. From `hg help config`:
3
4 > List values are separated by whitespace or comma, except when values are
5 > placed in double quotation marks:
6 >
7 > allow_read = "John Doe, PhD", brian, betty
8 >
9 > Quotation marks can be escaped by prefixing them with a backslash. Only
10 > quotation marks at the beginning of a word is counted as a quotation
11 > (e.g., ``foo"bar baz`` is the list of ``foo"bar`` and ``baz``).
12
13 That help documentation is fairly light on details, the actual parser has many
14 other edge cases. This test tries to cover them.
15 """
16
17 from mercurial.utils import stringutil
18
19
20 def assert_parselist(input, expected):
21 result = stringutil.parselist(input)
22 if result != expected:
23 raise AssertionError(
24 "parse_input(%r)\n got %r\nexpected %r"
25 % (input, result, expected)
26 )
27
28
29 # Keep these Python tests in sync with the Rust ones in `rust/hg-core/src/config/values.rs`
30
31 assert_parselist(b'', [])
32 assert_parselist(b',', [])
33 assert_parselist(b'A', [b'A'])
34 assert_parselist(b'B,B', [b'B', b'B'])
35 assert_parselist(b', C, ,C,', [b'C', b'C'])
36 assert_parselist(b'"', [b'"'])
37 assert_parselist(b'""', [b'', b''])
38 assert_parselist(b'D,"', [b'D', b'"'])
39 assert_parselist(b'E,""', [b'E', b'', b''])
40 assert_parselist(b'"F,F"', [b'F,F'])
41 assert_parselist(b'"G,G', [b'"G', b'G'])
42 assert_parselist(b'"H \\",\\"H', [b'"H', b',', b'H'])
43 assert_parselist(b'I,I"', [b'I', b'I"'])
44 assert_parselist(b'J,"J', [b'J', b'"J'])
45 assert_parselist(b'K K', [b'K', b'K'])
46 assert_parselist(b'"K" K', [b'K', b'K'])
47 assert_parselist(b'L\tL', [b'L', b'L'])
48 assert_parselist(b'"L"\tL', [b'L', b'', b'L'])
49 assert_parselist(b'M\x0bM', [b'M', b'M'])
50 assert_parselist(b'"M"\x0bM', [b'M', b'', b'M'])
51 assert_parselist(b'"N" , ,"', [b'N"'])
52 assert_parselist(b'" ,O, ', [b'"', b'O'])
@@ -1,486 +1,496 b''
1 // config.rs
1 // config.rs
2 //
2 //
3 // Copyright 2020
3 // Copyright 2020
4 // Valentin Gatien-Baron,
4 // Valentin Gatien-Baron,
5 // Raphaël Gomès <rgomes@octobus.net>
5 // Raphaël Gomès <rgomes@octobus.net>
6 //
6 //
7 // This software may be used and distributed according to the terms of the
7 // This software may be used and distributed according to the terms of the
8 // GNU General Public License version 2 or any later version.
8 // GNU General Public License version 2 or any later version.
9
9
10 use super::layer;
10 use super::layer;
11 use super::values;
11 use super::values;
12 use crate::config::layer::{
12 use crate::config::layer::{
13 ConfigError, ConfigLayer, ConfigOrigin, ConfigValue,
13 ConfigError, ConfigLayer, ConfigOrigin, ConfigValue,
14 };
14 };
15 use crate::utils::files::get_bytes_from_os_str;
15 use crate::utils::files::get_bytes_from_os_str;
16 use crate::utils::SliceExt;
16 use crate::utils::SliceExt;
17 use format_bytes::{write_bytes, DisplayBytes};
17 use format_bytes::{write_bytes, DisplayBytes};
18 use std::collections::HashSet;
18 use std::collections::HashSet;
19 use std::env;
19 use std::env;
20 use std::fmt;
20 use std::fmt;
21 use std::path::{Path, PathBuf};
21 use std::path::{Path, PathBuf};
22 use std::str;
22 use std::str;
23
23
24 use crate::errors::{HgResultExt, IoResultExt};
24 use crate::errors::{HgResultExt, IoResultExt};
25
25
26 /// Holds the config values for the current repository
26 /// Holds the config values for the current repository
27 /// TODO update this docstring once we support more sources
27 /// TODO update this docstring once we support more sources
28 #[derive(Clone)]
28 #[derive(Clone)]
29 pub struct Config {
29 pub struct Config {
30 layers: Vec<layer::ConfigLayer>,
30 layers: Vec<layer::ConfigLayer>,
31 }
31 }
32
32
33 impl DisplayBytes for Config {
33 impl DisplayBytes for Config {
34 fn display_bytes(
34 fn display_bytes(
35 &self,
35 &self,
36 out: &mut dyn std::io::Write,
36 out: &mut dyn std::io::Write,
37 ) -> std::io::Result<()> {
37 ) -> std::io::Result<()> {
38 for (index, layer) in self.layers.iter().rev().enumerate() {
38 for (index, layer) in self.layers.iter().rev().enumerate() {
39 write_bytes!(
39 write_bytes!(
40 out,
40 out,
41 b"==== Layer {} (trusted: {}) ====\n{}",
41 b"==== Layer {} (trusted: {}) ====\n{}",
42 index,
42 index,
43 if layer.trusted {
43 if layer.trusted {
44 &b"yes"[..]
44 &b"yes"[..]
45 } else {
45 } else {
46 &b"no"[..]
46 &b"no"[..]
47 },
47 },
48 layer
48 layer
49 )?;
49 )?;
50 }
50 }
51 Ok(())
51 Ok(())
52 }
52 }
53 }
53 }
54
54
55 pub enum ConfigSource {
55 pub enum ConfigSource {
56 /// Absolute path to a config file
56 /// Absolute path to a config file
57 AbsPath(PathBuf),
57 AbsPath(PathBuf),
58 /// Already parsed (from the CLI, env, Python resources, etc.)
58 /// Already parsed (from the CLI, env, Python resources, etc.)
59 Parsed(layer::ConfigLayer),
59 Parsed(layer::ConfigLayer),
60 }
60 }
61
61
62 #[derive(Debug)]
62 #[derive(Debug)]
63 pub struct ConfigValueParseError {
63 pub struct ConfigValueParseError {
64 pub origin: ConfigOrigin,
64 pub origin: ConfigOrigin,
65 pub line: Option<usize>,
65 pub line: Option<usize>,
66 pub section: Vec<u8>,
66 pub section: Vec<u8>,
67 pub item: Vec<u8>,
67 pub item: Vec<u8>,
68 pub value: Vec<u8>,
68 pub value: Vec<u8>,
69 pub expected_type: &'static str,
69 pub expected_type: &'static str,
70 }
70 }
71
71
72 impl fmt::Display for ConfigValueParseError {
72 impl fmt::Display for ConfigValueParseError {
73 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
73 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
74 // TODO: add origin and line number information, here and in
74 // TODO: add origin and line number information, here and in
75 // corresponding python code
75 // corresponding python code
76 write!(
76 write!(
77 f,
77 f,
78 "config error: {}.{} is not a {} ('{}')",
78 "config error: {}.{} is not a {} ('{}')",
79 String::from_utf8_lossy(&self.section),
79 String::from_utf8_lossy(&self.section),
80 String::from_utf8_lossy(&self.item),
80 String::from_utf8_lossy(&self.item),
81 self.expected_type,
81 self.expected_type,
82 String::from_utf8_lossy(&self.value)
82 String::from_utf8_lossy(&self.value)
83 )
83 )
84 }
84 }
85 }
85 }
86
86
87 impl Config {
87 impl Config {
88 /// Load system and user configuration from various files.
88 /// Load system and user configuration from various files.
89 ///
89 ///
90 /// This is also affected by some environment variables.
90 /// This is also affected by some environment variables.
91 pub fn load_non_repo() -> Result<Self, ConfigError> {
91 pub fn load_non_repo() -> Result<Self, ConfigError> {
92 let mut config = Self { layers: Vec::new() };
92 let mut config = Self { layers: Vec::new() };
93 let opt_rc_path = env::var_os("HGRCPATH");
93 let opt_rc_path = env::var_os("HGRCPATH");
94 // HGRCPATH replaces system config
94 // HGRCPATH replaces system config
95 if opt_rc_path.is_none() {
95 if opt_rc_path.is_none() {
96 config.add_system_config()?
96 config.add_system_config()?
97 }
97 }
98
98
99 config.add_for_environment_variable("EDITOR", b"ui", b"editor");
99 config.add_for_environment_variable("EDITOR", b"ui", b"editor");
100 config.add_for_environment_variable("VISUAL", b"ui", b"editor");
100 config.add_for_environment_variable("VISUAL", b"ui", b"editor");
101 config.add_for_environment_variable("PAGER", b"pager", b"pager");
101 config.add_for_environment_variable("PAGER", b"pager", b"pager");
102
102
103 // These are set by `run-tests.py --rhg` to enable fallback for the
103 // These are set by `run-tests.py --rhg` to enable fallback for the
104 // entire test suite. Alternatives would be setting configuration
104 // entire test suite. Alternatives would be setting configuration
105 // through `$HGRCPATH` but some tests override that, or changing the
105 // through `$HGRCPATH` but some tests override that, or changing the
106 // `hg` shell alias to include `--config` but that disrupts tests that
106 // `hg` shell alias to include `--config` but that disrupts tests that
107 // print command lines and check expected output.
107 // print command lines and check expected output.
108 config.add_for_environment_variable(
108 config.add_for_environment_variable(
109 "RHG_ON_UNSUPPORTED",
109 "RHG_ON_UNSUPPORTED",
110 b"rhg",
110 b"rhg",
111 b"on-unsupported",
111 b"on-unsupported",
112 );
112 );
113 config.add_for_environment_variable(
113 config.add_for_environment_variable(
114 "RHG_FALLBACK_EXECUTABLE",
114 "RHG_FALLBACK_EXECUTABLE",
115 b"rhg",
115 b"rhg",
116 b"fallback-executable",
116 b"fallback-executable",
117 );
117 );
118
118
119 // HGRCPATH replaces user config
119 // HGRCPATH replaces user config
120 if opt_rc_path.is_none() {
120 if opt_rc_path.is_none() {
121 config.add_user_config()?
121 config.add_user_config()?
122 }
122 }
123 if let Some(rc_path) = &opt_rc_path {
123 if let Some(rc_path) = &opt_rc_path {
124 for path in env::split_paths(rc_path) {
124 for path in env::split_paths(rc_path) {
125 if !path.as_os_str().is_empty() {
125 if !path.as_os_str().is_empty() {
126 if path.is_dir() {
126 if path.is_dir() {
127 config.add_trusted_dir(&path)?
127 config.add_trusted_dir(&path)?
128 } else {
128 } else {
129 config.add_trusted_file(&path)?
129 config.add_trusted_file(&path)?
130 }
130 }
131 }
131 }
132 }
132 }
133 }
133 }
134 Ok(config)
134 Ok(config)
135 }
135 }
136
136
137 pub fn load_cli_args_config(
137 pub fn load_cli_args_config(
138 &mut self,
138 &mut self,
139 cli_config_args: impl IntoIterator<Item = impl AsRef<[u8]>>,
139 cli_config_args: impl IntoIterator<Item = impl AsRef<[u8]>>,
140 ) -> Result<(), ConfigError> {
140 ) -> Result<(), ConfigError> {
141 if let Some(layer) = ConfigLayer::parse_cli_args(cli_config_args)? {
141 if let Some(layer) = ConfigLayer::parse_cli_args(cli_config_args)? {
142 self.layers.push(layer)
142 self.layers.push(layer)
143 }
143 }
144 Ok(())
144 Ok(())
145 }
145 }
146
146
147 fn add_trusted_dir(&mut self, path: &Path) -> Result<(), ConfigError> {
147 fn add_trusted_dir(&mut self, path: &Path) -> Result<(), ConfigError> {
148 if let Some(entries) = std::fs::read_dir(path)
148 if let Some(entries) = std::fs::read_dir(path)
149 .when_reading_file(path)
149 .when_reading_file(path)
150 .io_not_found_as_none()?
150 .io_not_found_as_none()?
151 {
151 {
152 let mut file_paths = entries
152 let mut file_paths = entries
153 .map(|result| {
153 .map(|result| {
154 result.when_reading_file(path).map(|entry| entry.path())
154 result.when_reading_file(path).map(|entry| entry.path())
155 })
155 })
156 .collect::<Result<Vec<_>, _>>()?;
156 .collect::<Result<Vec<_>, _>>()?;
157 file_paths.sort();
157 file_paths.sort();
158 for file_path in &file_paths {
158 for file_path in &file_paths {
159 if file_path.extension() == Some(std::ffi::OsStr::new("rc")) {
159 if file_path.extension() == Some(std::ffi::OsStr::new("rc")) {
160 self.add_trusted_file(&file_path)?
160 self.add_trusted_file(&file_path)?
161 }
161 }
162 }
162 }
163 }
163 }
164 Ok(())
164 Ok(())
165 }
165 }
166
166
167 fn add_trusted_file(&mut self, path: &Path) -> Result<(), ConfigError> {
167 fn add_trusted_file(&mut self, path: &Path) -> Result<(), ConfigError> {
168 if let Some(data) = std::fs::read(path)
168 if let Some(data) = std::fs::read(path)
169 .when_reading_file(path)
169 .when_reading_file(path)
170 .io_not_found_as_none()?
170 .io_not_found_as_none()?
171 {
171 {
172 self.layers.extend(ConfigLayer::parse(path, &data)?)
172 self.layers.extend(ConfigLayer::parse(path, &data)?)
173 }
173 }
174 Ok(())
174 Ok(())
175 }
175 }
176
176
177 fn add_for_environment_variable(
177 fn add_for_environment_variable(
178 &mut self,
178 &mut self,
179 var: &str,
179 var: &str,
180 section: &[u8],
180 section: &[u8],
181 key: &[u8],
181 key: &[u8],
182 ) {
182 ) {
183 if let Some(value) = env::var_os(var) {
183 if let Some(value) = env::var_os(var) {
184 let origin = layer::ConfigOrigin::Environment(var.into());
184 let origin = layer::ConfigOrigin::Environment(var.into());
185 let mut layer = ConfigLayer::new(origin);
185 let mut layer = ConfigLayer::new(origin);
186 layer.add(
186 layer.add(
187 section.to_owned(),
187 section.to_owned(),
188 key.to_owned(),
188 key.to_owned(),
189 get_bytes_from_os_str(value),
189 get_bytes_from_os_str(value),
190 None,
190 None,
191 );
191 );
192 self.layers.push(layer)
192 self.layers.push(layer)
193 }
193 }
194 }
194 }
195
195
196 #[cfg(unix)] // TODO: other platforms
196 #[cfg(unix)] // TODO: other platforms
197 fn add_system_config(&mut self) -> Result<(), ConfigError> {
197 fn add_system_config(&mut self) -> Result<(), ConfigError> {
198 let mut add_for_prefix = |prefix: &Path| -> Result<(), ConfigError> {
198 let mut add_for_prefix = |prefix: &Path| -> Result<(), ConfigError> {
199 let etc = prefix.join("etc").join("mercurial");
199 let etc = prefix.join("etc").join("mercurial");
200 self.add_trusted_file(&etc.join("hgrc"))?;
200 self.add_trusted_file(&etc.join("hgrc"))?;
201 self.add_trusted_dir(&etc.join("hgrc.d"))
201 self.add_trusted_dir(&etc.join("hgrc.d"))
202 };
202 };
203 let root = Path::new("/");
203 let root = Path::new("/");
204 // TODO: use `std::env::args_os().next().unwrap()` a.k.a. argv[0]
204 // TODO: use `std::env::args_os().next().unwrap()` a.k.a. argv[0]
205 // instead? TODO: can this be a relative path?
205 // instead? TODO: can this be a relative path?
206 let hg = crate::utils::current_exe()?;
206 let hg = crate::utils::current_exe()?;
207 // TODO: this order (per-installation then per-system) matches
207 // TODO: this order (per-installation then per-system) matches
208 // `systemrcpath()` in `mercurial/scmposix.py`, but
208 // `systemrcpath()` in `mercurial/scmposix.py`, but
209 // `mercurial/helptext/config.txt` suggests it should be reversed
209 // `mercurial/helptext/config.txt` suggests it should be reversed
210 if let Some(installation_prefix) = hg.parent().and_then(Path::parent) {
210 if let Some(installation_prefix) = hg.parent().and_then(Path::parent) {
211 if installation_prefix != root {
211 if installation_prefix != root {
212 add_for_prefix(&installation_prefix)?
212 add_for_prefix(&installation_prefix)?
213 }
213 }
214 }
214 }
215 add_for_prefix(root)?;
215 add_for_prefix(root)?;
216 Ok(())
216 Ok(())
217 }
217 }
218
218
219 #[cfg(unix)] // TODO: other plateforms
219 #[cfg(unix)] // TODO: other plateforms
220 fn add_user_config(&mut self) -> Result<(), ConfigError> {
220 fn add_user_config(&mut self) -> Result<(), ConfigError> {
221 let opt_home = home::home_dir();
221 let opt_home = home::home_dir();
222 if let Some(home) = &opt_home {
222 if let Some(home) = &opt_home {
223 self.add_trusted_file(&home.join(".hgrc"))?
223 self.add_trusted_file(&home.join(".hgrc"))?
224 }
224 }
225 let darwin = cfg!(any(target_os = "macos", target_os = "ios"));
225 let darwin = cfg!(any(target_os = "macos", target_os = "ios"));
226 if !darwin {
226 if !darwin {
227 if let Some(config_home) = env::var_os("XDG_CONFIG_HOME")
227 if let Some(config_home) = env::var_os("XDG_CONFIG_HOME")
228 .map(PathBuf::from)
228 .map(PathBuf::from)
229 .or_else(|| opt_home.map(|home| home.join(".config")))
229 .or_else(|| opt_home.map(|home| home.join(".config")))
230 {
230 {
231 self.add_trusted_file(&config_home.join("hg").join("hgrc"))?
231 self.add_trusted_file(&config_home.join("hg").join("hgrc"))?
232 }
232 }
233 }
233 }
234 Ok(())
234 Ok(())
235 }
235 }
236
236
237 /// Loads in order, which means that the precedence is the same
237 /// Loads in order, which means that the precedence is the same
238 /// as the order of `sources`.
238 /// as the order of `sources`.
239 pub fn load_from_explicit_sources(
239 pub fn load_from_explicit_sources(
240 sources: Vec<ConfigSource>,
240 sources: Vec<ConfigSource>,
241 ) -> Result<Self, ConfigError> {
241 ) -> Result<Self, ConfigError> {
242 let mut layers = vec![];
242 let mut layers = vec![];
243
243
244 for source in sources.into_iter() {
244 for source in sources.into_iter() {
245 match source {
245 match source {
246 ConfigSource::Parsed(c) => layers.push(c),
246 ConfigSource::Parsed(c) => layers.push(c),
247 ConfigSource::AbsPath(c) => {
247 ConfigSource::AbsPath(c) => {
248 // TODO check if it should be trusted
248 // TODO check if it should be trusted
249 // mercurial/ui.py:427
249 // mercurial/ui.py:427
250 let data = match std::fs::read(&c) {
250 let data = match std::fs::read(&c) {
251 Err(_) => continue, // same as the python code
251 Err(_) => continue, // same as the python code
252 Ok(data) => data,
252 Ok(data) => data,
253 };
253 };
254 layers.extend(ConfigLayer::parse(&c, &data)?)
254 layers.extend(ConfigLayer::parse(&c, &data)?)
255 }
255 }
256 }
256 }
257 }
257 }
258
258
259 Ok(Config { layers })
259 Ok(Config { layers })
260 }
260 }
261
261
262 /// Loads the per-repository config into a new `Config` which is combined
262 /// Loads the per-repository config into a new `Config` which is combined
263 /// with `self`.
263 /// with `self`.
264 pub(crate) fn combine_with_repo(
264 pub(crate) fn combine_with_repo(
265 &self,
265 &self,
266 repo_config_files: &[PathBuf],
266 repo_config_files: &[PathBuf],
267 ) -> Result<Self, ConfigError> {
267 ) -> Result<Self, ConfigError> {
268 let (cli_layers, other_layers) = self
268 let (cli_layers, other_layers) = self
269 .layers
269 .layers
270 .iter()
270 .iter()
271 .cloned()
271 .cloned()
272 .partition(ConfigLayer::is_from_command_line);
272 .partition(ConfigLayer::is_from_command_line);
273
273
274 let mut repo_config = Self {
274 let mut repo_config = Self {
275 layers: other_layers,
275 layers: other_layers,
276 };
276 };
277 for path in repo_config_files {
277 for path in repo_config_files {
278 // TODO: check if this file should be trusted:
278 // TODO: check if this file should be trusted:
279 // `mercurial/ui.py:427`
279 // `mercurial/ui.py:427`
280 repo_config.add_trusted_file(path)?;
280 repo_config.add_trusted_file(path)?;
281 }
281 }
282 repo_config.layers.extend(cli_layers);
282 repo_config.layers.extend(cli_layers);
283 Ok(repo_config)
283 Ok(repo_config)
284 }
284 }
285
285
286 fn get_parse<'config, T: 'config>(
286 fn get_parse<'config, T: 'config>(
287 &'config self,
287 &'config self,
288 section: &[u8],
288 section: &[u8],
289 item: &[u8],
289 item: &[u8],
290 expected_type: &'static str,
290 expected_type: &'static str,
291 parse: impl Fn(&'config [u8]) -> Option<T>,
291 parse: impl Fn(&'config [u8]) -> Option<T>,
292 ) -> Result<Option<T>, ConfigValueParseError> {
292 ) -> Result<Option<T>, ConfigValueParseError> {
293 match self.get_inner(&section, &item) {
293 match self.get_inner(&section, &item) {
294 Some((layer, v)) => match parse(&v.bytes) {
294 Some((layer, v)) => match parse(&v.bytes) {
295 Some(b) => Ok(Some(b)),
295 Some(b) => Ok(Some(b)),
296 None => Err(ConfigValueParseError {
296 None => Err(ConfigValueParseError {
297 origin: layer.origin.to_owned(),
297 origin: layer.origin.to_owned(),
298 line: v.line,
298 line: v.line,
299 value: v.bytes.to_owned(),
299 value: v.bytes.to_owned(),
300 section: section.to_owned(),
300 section: section.to_owned(),
301 item: item.to_owned(),
301 item: item.to_owned(),
302 expected_type,
302 expected_type,
303 }),
303 }),
304 },
304 },
305 None => Ok(None),
305 None => Ok(None),
306 }
306 }
307 }
307 }
308
308
309 /// Returns an `Err` if the first value found is not a valid UTF-8 string.
309 /// Returns an `Err` if the first value found is not a valid UTF-8 string.
310 /// Otherwise, returns an `Ok(value)` if found, or `None`.
310 /// Otherwise, returns an `Ok(value)` if found, or `None`.
311 pub fn get_str(
311 pub fn get_str(
312 &self,
312 &self,
313 section: &[u8],
313 section: &[u8],
314 item: &[u8],
314 item: &[u8],
315 ) -> Result<Option<&str>, ConfigValueParseError> {
315 ) -> Result<Option<&str>, ConfigValueParseError> {
316 self.get_parse(section, item, "ASCII or UTF-8 string", |value| {
316 self.get_parse(section, item, "ASCII or UTF-8 string", |value| {
317 str::from_utf8(value).ok()
317 str::from_utf8(value).ok()
318 })
318 })
319 }
319 }
320
320
321 /// Returns an `Err` if the first value found is not a valid unsigned
321 /// Returns an `Err` if the first value found is not a valid unsigned
322 /// integer. Otherwise, returns an `Ok(value)` if found, or `None`.
322 /// integer. Otherwise, returns an `Ok(value)` if found, or `None`.
323 pub fn get_u32(
323 pub fn get_u32(
324 &self,
324 &self,
325 section: &[u8],
325 section: &[u8],
326 item: &[u8],
326 item: &[u8],
327 ) -> Result<Option<u32>, ConfigValueParseError> {
327 ) -> Result<Option<u32>, ConfigValueParseError> {
328 self.get_parse(section, item, "valid integer", |value| {
328 self.get_parse(section, item, "valid integer", |value| {
329 str::from_utf8(value).ok()?.parse().ok()
329 str::from_utf8(value).ok()?.parse().ok()
330 })
330 })
331 }
331 }
332
332
333 /// Returns an `Err` if the first value found is not a valid file size
333 /// Returns an `Err` if the first value found is not a valid file size
334 /// value such as `30` (default unit is bytes), `7 MB`, or `42.5 kb`.
334 /// value such as `30` (default unit is bytes), `7 MB`, or `42.5 kb`.
335 /// Otherwise, returns an `Ok(value_in_bytes)` if found, or `None`.
335 /// Otherwise, returns an `Ok(value_in_bytes)` if found, or `None`.
336 pub fn get_byte_size(
336 pub fn get_byte_size(
337 &self,
337 &self,
338 section: &[u8],
338 section: &[u8],
339 item: &[u8],
339 item: &[u8],
340 ) -> Result<Option<u64>, ConfigValueParseError> {
340 ) -> Result<Option<u64>, ConfigValueParseError> {
341 self.get_parse(section, item, "byte quantity", values::parse_byte_size)
341 self.get_parse(section, item, "byte quantity", values::parse_byte_size)
342 }
342 }
343
343
344 /// Returns an `Err` if the first value found is not a valid boolean.
344 /// Returns an `Err` if the first value found is not a valid boolean.
345 /// Otherwise, returns an `Ok(option)`, where `option` is the boolean if
345 /// Otherwise, returns an `Ok(option)`, where `option` is the boolean if
346 /// found, or `None`.
346 /// found, or `None`.
347 pub fn get_option(
347 pub fn get_option(
348 &self,
348 &self,
349 section: &[u8],
349 section: &[u8],
350 item: &[u8],
350 item: &[u8],
351 ) -> Result<Option<bool>, ConfigValueParseError> {
351 ) -> Result<Option<bool>, ConfigValueParseError> {
352 self.get_parse(section, item, "boolean", values::parse_bool)
352 self.get_parse(section, item, "boolean", values::parse_bool)
353 }
353 }
354
354
355 /// Returns the corresponding boolean in the config. Returns `Ok(false)`
355 /// Returns the corresponding boolean in the config. Returns `Ok(false)`
356 /// if the value is not found, an `Err` if it's not a valid boolean.
356 /// if the value is not found, an `Err` if it's not a valid boolean.
357 pub fn get_bool(
357 pub fn get_bool(
358 &self,
358 &self,
359 section: &[u8],
359 section: &[u8],
360 item: &[u8],
360 item: &[u8],
361 ) -> Result<bool, ConfigValueParseError> {
361 ) -> Result<bool, ConfigValueParseError> {
362 Ok(self.get_option(section, item)?.unwrap_or(false))
362 Ok(self.get_option(section, item)?.unwrap_or(false))
363 }
363 }
364
364
365 /// Returns the corresponding list-value in the config if found, or `None`.
365 /// Returns the corresponding list-value in the config if found, or `None`.
366 ///
366 ///
367 /// This is appropriate for new configuration keys. The value syntax is
367 /// This is appropriate for new configuration keys. The value syntax is
368 /// **not** the same as most existing list-valued config, which has Python
368 /// **not** the same as most existing list-valued config, which has Python
369 /// parsing implemented in `parselist()` in
369 /// parsing implemented in `parselist()` in
370 /// `mercurial/utils/stringutil.py`. Faithfully porting that parsing
370 /// `mercurial/utils/stringutil.py`. Faithfully porting that parsing
371 /// algorithm to Rust (including behavior that are arguably bugs)
371 /// algorithm to Rust (including behavior that are arguably bugs)
372 /// turned out to be non-trivial and hasn’t been completed as of this
372 /// turned out to be non-trivial and hasn’t been completed as of this
373 /// writing.
373 /// writing.
374 ///
374 ///
375 /// Instead, the "simple" syntax is: split on comma, then trim leading and
375 /// Instead, the "simple" syntax is: split on comma, then trim leading and
376 /// trailing whitespace of each component. Quotes or backslashes are not
376 /// trailing whitespace of each component. Quotes or backslashes are not
377 /// interpreted in any way. Commas are mandatory between values. Values
377 /// interpreted in any way. Commas are mandatory between values. Values
378 /// that contain a comma are not supported.
378 /// that contain a comma are not supported.
379 pub fn get_simple_list(
379 pub fn get_simple_list(
380 &self,
380 &self,
381 section: &[u8],
381 section: &[u8],
382 item: &[u8],
382 item: &[u8],
383 ) -> Option<impl Iterator<Item = &[u8]>> {
383 ) -> Option<impl Iterator<Item = &[u8]>> {
384 self.get(section, item).map(|value| {
384 self.get(section, item).map(|value| {
385 value
385 value
386 .split(|&byte| byte == b',')
386 .split(|&byte| byte == b',')
387 .map(|component| component.trim())
387 .map(|component| component.trim())
388 })
388 })
389 }
389 }
390
390
391 /// If there is an `item` value in `section`, parse and return a list of
392 /// byte strings.
393 pub fn get_list(
394 &self,
395 section: &[u8],
396 item: &[u8],
397 ) -> Option<Vec<Vec<u8>>> {
398 self.get(section, item).map(values::parse_list)
399 }
400
391 /// Returns the raw value bytes of the first one found, or `None`.
401 /// Returns the raw value bytes of the first one found, or `None`.
392 pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&[u8]> {
402 pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&[u8]> {
393 self.get_inner(section, item)
403 self.get_inner(section, item)
394 .map(|(_, value)| value.bytes.as_ref())
404 .map(|(_, value)| value.bytes.as_ref())
395 }
405 }
396
406
397 /// Returns the layer and the value of the first one found, or `None`.
407 /// Returns the layer and the value of the first one found, or `None`.
398 fn get_inner(
408 fn get_inner(
399 &self,
409 &self,
400 section: &[u8],
410 section: &[u8],
401 item: &[u8],
411 item: &[u8],
402 ) -> Option<(&ConfigLayer, &ConfigValue)> {
412 ) -> Option<(&ConfigLayer, &ConfigValue)> {
403 for layer in self.layers.iter().rev() {
413 for layer in self.layers.iter().rev() {
404 if !layer.trusted {
414 if !layer.trusted {
405 continue;
415 continue;
406 }
416 }
407 if let Some(v) = layer.get(&section, &item) {
417 if let Some(v) = layer.get(&section, &item) {
408 return Some((&layer, v));
418 return Some((&layer, v));
409 }
419 }
410 }
420 }
411 None
421 None
412 }
422 }
413
423
414 /// Return all keys defined for the given section
424 /// Return all keys defined for the given section
415 pub fn get_section_keys(&self, section: &[u8]) -> HashSet<&[u8]> {
425 pub fn get_section_keys(&self, section: &[u8]) -> HashSet<&[u8]> {
416 self.layers
426 self.layers
417 .iter()
427 .iter()
418 .flat_map(|layer| layer.iter_keys(section))
428 .flat_map(|layer| layer.iter_keys(section))
419 .collect()
429 .collect()
420 }
430 }
421
431
422 /// Get raw values bytes from all layers (even untrusted ones) in order
432 /// Get raw values bytes from all layers (even untrusted ones) in order
423 /// of precedence.
433 /// of precedence.
424 #[cfg(test)]
434 #[cfg(test)]
425 fn get_all(&self, section: &[u8], item: &[u8]) -> Vec<&[u8]> {
435 fn get_all(&self, section: &[u8], item: &[u8]) -> Vec<&[u8]> {
426 let mut res = vec![];
436 let mut res = vec![];
427 for layer in self.layers.iter().rev() {
437 for layer in self.layers.iter().rev() {
428 if let Some(v) = layer.get(&section, &item) {
438 if let Some(v) = layer.get(&section, &item) {
429 res.push(v.bytes.as_ref());
439 res.push(v.bytes.as_ref());
430 }
440 }
431 }
441 }
432 res
442 res
433 }
443 }
434 }
444 }
435
445
436 #[cfg(test)]
446 #[cfg(test)]
437 mod tests {
447 mod tests {
438 use super::*;
448 use super::*;
439 use pretty_assertions::assert_eq;
449 use pretty_assertions::assert_eq;
440 use std::fs::File;
450 use std::fs::File;
441 use std::io::Write;
451 use std::io::Write;
442
452
443 #[test]
453 #[test]
444 fn test_include_layer_ordering() {
454 fn test_include_layer_ordering() {
445 let tmpdir = tempfile::tempdir().unwrap();
455 let tmpdir = tempfile::tempdir().unwrap();
446 let tmpdir_path = tmpdir.path();
456 let tmpdir_path = tmpdir.path();
447 let mut included_file =
457 let mut included_file =
448 File::create(&tmpdir_path.join("included.rc")).unwrap();
458 File::create(&tmpdir_path.join("included.rc")).unwrap();
449
459
450 included_file.write_all(b"[section]\nitem=value1").unwrap();
460 included_file.write_all(b"[section]\nitem=value1").unwrap();
451 let base_config_path = tmpdir_path.join("base.rc");
461 let base_config_path = tmpdir_path.join("base.rc");
452 let mut config_file = File::create(&base_config_path).unwrap();
462 let mut config_file = File::create(&base_config_path).unwrap();
453 let data =
463 let data =
454 b"[section]\nitem=value0\n%include included.rc\nitem=value2\n\
464 b"[section]\nitem=value0\n%include included.rc\nitem=value2\n\
455 [section2]\ncount = 4\nsize = 1.5 KB\nnot-count = 1.5\nnot-size = 1 ub";
465 [section2]\ncount = 4\nsize = 1.5 KB\nnot-count = 1.5\nnot-size = 1 ub";
456 config_file.write_all(data).unwrap();
466 config_file.write_all(data).unwrap();
457
467
458 let sources = vec![ConfigSource::AbsPath(base_config_path)];
468 let sources = vec![ConfigSource::AbsPath(base_config_path)];
459 let config = Config::load_from_explicit_sources(sources)
469 let config = Config::load_from_explicit_sources(sources)
460 .expect("expected valid config");
470 .expect("expected valid config");
461
471
462 let (_, value) = config.get_inner(b"section", b"item").unwrap();
472 let (_, value) = config.get_inner(b"section", b"item").unwrap();
463 assert_eq!(
473 assert_eq!(
464 value,
474 value,
465 &ConfigValue {
475 &ConfigValue {
466 bytes: b"value2".to_vec(),
476 bytes: b"value2".to_vec(),
467 line: Some(4)
477 line: Some(4)
468 }
478 }
469 );
479 );
470
480
471 let value = config.get(b"section", b"item").unwrap();
481 let value = config.get(b"section", b"item").unwrap();
472 assert_eq!(value, b"value2",);
482 assert_eq!(value, b"value2",);
473 assert_eq!(
483 assert_eq!(
474 config.get_all(b"section", b"item"),
484 config.get_all(b"section", b"item"),
475 [b"value2", b"value1", b"value0"]
485 [b"value2", b"value1", b"value0"]
476 );
486 );
477
487
478 assert_eq!(config.get_u32(b"section2", b"count").unwrap(), Some(4));
488 assert_eq!(config.get_u32(b"section2", b"count").unwrap(), Some(4));
479 assert_eq!(
489 assert_eq!(
480 config.get_byte_size(b"section2", b"size").unwrap(),
490 config.get_byte_size(b"section2", b"size").unwrap(),
481 Some(1024 + 512)
491 Some(1024 + 512)
482 );
492 );
483 assert!(config.get_u32(b"section2", b"not-count").is_err());
493 assert!(config.get_u32(b"section2", b"not-count").is_err());
484 assert!(config.get_byte_size(b"section2", b"not-size").is_err());
494 assert!(config.get_byte_size(b"section2", b"not-size").is_err());
485 }
495 }
486 }
496 }
@@ -1,61 +1,273 b''
1 //! Parsing functions for various type of configuration values.
1 //! Parsing functions for various type of configuration values.
2 //!
2 //!
3 //! Returning `None` indicates a syntax error. Using a `Result` would be more
3 //! Returning `None` indicates a syntax error. Using a `Result` would be more
4 //! correct but would take more boilerplate for converting between error types,
4 //! correct but would take more boilerplate for converting between error types,
5 //! compared to using `.ok()` on inner results of various error types to
5 //! compared to using `.ok()` on inner results of various error types to
6 //! convert them all to options. The `Config::get_parse` method later converts
6 //! convert them all to options. The `Config::get_parse` method later converts
7 //! those options to results with `ConfigValueParseError`, which contains
7 //! those options to results with `ConfigValueParseError`, which contains
8 //! details about where the value came from (but omits details of what’s
8 //! details about where the value came from (but omits details of what’s
9 //! invalid inside the value).
9 //! invalid inside the value).
10
10
11 use crate::utils::SliceExt;
12
11 pub(super) fn parse_bool(v: &[u8]) -> Option<bool> {
13 pub(super) fn parse_bool(v: &[u8]) -> Option<bool> {
12 match v.to_ascii_lowercase().as_slice() {
14 match v.to_ascii_lowercase().as_slice() {
13 b"1" | b"yes" | b"true" | b"on" | b"always" => Some(true),
15 b"1" | b"yes" | b"true" | b"on" | b"always" => Some(true),
14 b"0" | b"no" | b"false" | b"off" | b"never" => Some(false),
16 b"0" | b"no" | b"false" | b"off" | b"never" => Some(false),
15 _ => None,
17 _ => None,
16 }
18 }
17 }
19 }
18
20
19 pub(super) fn parse_byte_size(value: &[u8]) -> Option<u64> {
21 pub(super) fn parse_byte_size(value: &[u8]) -> Option<u64> {
20 let value = std::str::from_utf8(value).ok()?.to_ascii_lowercase();
22 let value = std::str::from_utf8(value).ok()?.to_ascii_lowercase();
21 const UNITS: &[(&str, u64)] = &[
23 const UNITS: &[(&str, u64)] = &[
22 ("g", 1 << 30),
24 ("g", 1 << 30),
23 ("gb", 1 << 30),
25 ("gb", 1 << 30),
24 ("m", 1 << 20),
26 ("m", 1 << 20),
25 ("mb", 1 << 20),
27 ("mb", 1 << 20),
26 ("k", 1 << 10),
28 ("k", 1 << 10),
27 ("kb", 1 << 10),
29 ("kb", 1 << 10),
28 ("b", 1 << 0), // Needs to be last
30 ("b", 1 << 0), // Needs to be last
29 ];
31 ];
30 for &(unit, multiplier) in UNITS {
32 for &(unit, multiplier) in UNITS {
31 // TODO: use `value.strip_suffix(unit)` when we require Rust 1.45+
33 // TODO: use `value.strip_suffix(unit)` when we require Rust 1.45+
32 if value.ends_with(unit) {
34 if value.ends_with(unit) {
33 let value_before_unit = &value[..value.len() - unit.len()];
35 let value_before_unit = &value[..value.len() - unit.len()];
34 let float: f64 = value_before_unit.trim().parse().ok()?;
36 let float: f64 = value_before_unit.trim().parse().ok()?;
35 if float >= 0.0 {
37 if float >= 0.0 {
36 return Some((float * multiplier as f64).round() as u64);
38 return Some((float * multiplier as f64).round() as u64);
37 } else {
39 } else {
38 return None;
40 return None;
39 }
41 }
40 }
42 }
41 }
43 }
42 value.parse().ok()
44 value.parse().ok()
43 }
45 }
44
46
47 /// Parse a config value as a list of sub-values.
48 ///
49 /// Ported from `parselist` in `mercurial/utils/stringutil.py`
50
51 // Note: keep behavior in sync with the Python one.
52
53 // Note: this could return `Vec<Cow<[u8]>>` instead and borrow `input` when
54 // possible (when there’s no backslash-escapes) but this is probably not worth
55 // the complexity as config is presumably not accessed inside
56 // preformance-sensitive loops.
57 pub(super) fn parse_list(input: &[u8]) -> Vec<Vec<u8>> {
58 // Port of Python’s `value.lstrip(b' ,\n')`
59 // TODO: is this really what we want?
60 let input =
61 input.trim_start_matches(|b| b == b' ' || b == b',' || b == b'\n');
62 parse_list_without_trim_start(input)
63 }
64
65 fn parse_list_without_trim_start(input: &[u8]) -> Vec<Vec<u8>> {
66 // Start of port of Python’s `_configlist`
67 let input = input.trim_end_matches(|b| b == b' ' || b == b',');
68 if input.is_empty() {
69 return Vec::new();
70 }
71
72 // Just to make “a string” less confusable with “a list of strings”.
73 type ByteString = Vec<u8>;
74
75 // These correspond to Python’s…
76 let mut mode = ParserMode::Plain; // `parser`
77 let mut values = Vec::new(); // `parts[:-1]`
78 let mut next_value = ByteString::new(); // `parts[-1]`
79 let mut offset = 0; // `offset`
80
81 // Setting `parser` to `None` is instead handled by returning immediately
82 enum ParserMode {
83 Plain,
84 Quoted,
85 }
86
87 loop {
88 match mode {
89 ParserMode::Plain => {
90 // Start of port of Python’s `_parse_plain`
91 let mut whitespace = false;
92 while let Some(&byte) = input.get(offset) {
93 if is_space(byte) || byte == b',' {
94 whitespace = true;
95 offset += 1;
96 } else {
97 break;
98 }
99 }
100 if let Some(&byte) = input.get(offset) {
101 if whitespace {
102 values.push(std::mem::take(&mut next_value))
103 }
104 if byte == b'"' && next_value.is_empty() {
105 mode = ParserMode::Quoted;
106 } else {
107 if byte == b'"' && next_value.ends_with(b"\\") {
108 next_value.pop();
109 }
110 next_value.push(byte);
111 }
112 offset += 1;
113 } else {
114 values.push(next_value);
115 return values;
116 }
117 }
118 ParserMode::Quoted => {
119 // Start of port of Python’s `_parse_quote`
120 if let Some(&byte) = input.get(offset) {
121 if byte == b'"' {
122 // The input contains a quoted zero-length value `""`
123 debug_assert_eq!(next_value, b"");
124 values.push(std::mem::take(&mut next_value));
125 offset += 1;
126 while let Some(&byte) = input.get(offset) {
127 if is_space(byte) || byte == b',' {
128 offset += 1;
129 } else {
130 break;
131 }
132 }
133 mode = ParserMode::Plain;
134 continue;
135 }
136 }
137
138 while let Some(&byte) = input.get(offset) {
139 if byte == b'"' {
140 break;
141 }
142 if byte == b'\\' && input.get(offset + 1) == Some(&b'"') {
143 next_value.push(b'"');
144 offset += 2;
145 } else {
146 next_value.push(byte);
147 offset += 1;
148 }
149 }
150
151 if offset >= input.len() {
152 // We didn’t find a closing double-quote,
153 // so treat the opening one as part of an unquoted value
154 // instead of delimiting the start of a quoted value.
155
156 // `next_value` may have had some backslash-escapes
157 // unescaped. TODO: shouldn’t we use a slice of `input`
158 // instead?
159 let mut real_values =
160 parse_list_without_trim_start(&next_value);
161
162 if let Some(first) = real_values.first_mut() {
163 first.insert(0, b'"');
164 // Drop `next_value`
165 values.extend(real_values)
166 } else {
167 next_value.push(b'"');
168 values.push(next_value);
169 }
170 return values;
171 }
172
173 // We’re not at the end of the input, which means the `while`
174 // loop above ended at at double quote. Skip
175 // over that.
176 offset += 1;
177
178 while let Some(&byte) = input.get(offset) {
179 if byte == b' ' || byte == b',' {
180 offset += 1;
181 } else {
182 break;
183 }
184 }
185
186 if offset >= input.len() {
187 values.push(next_value);
188 return values;
189 }
190
191 if offset + 1 == input.len() && input[offset] == b'"' {
192 next_value.push(b'"');
193 offset += 1;
194 } else {
195 values.push(std::mem::take(&mut next_value));
196 }
197
198 mode = ParserMode::Plain;
199 }
200 }
201 }
202
203 // https://docs.python.org/3/library/stdtypes.html?#bytes.isspace
204 fn is_space(byte: u8) -> bool {
205 if let b' ' | b'\t' | b'\n' | b'\r' | b'\x0b' | b'\x0c' = byte {
206 true
207 } else {
208 false
209 }
210 }
211 }
212
213 #[test]
214 fn test_parse_list() {
215 // Make `assert_eq` error messages nicer
216 fn as_strings(values: &[Vec<u8>]) -> Vec<String> {
217 values
218 .iter()
219 .map(|v| std::str::from_utf8(v.as_ref()).unwrap().to_owned())
220 .collect()
221 }
222 macro_rules! assert_parse_list {
223 ( $input: expr => [ $( $output: expr ),* ] ) => {
224 assert_eq!(
225 as_strings(&parse_list($input)),
226 as_strings(&[ $( Vec::from(&$output[..]) ),* ]),
227 );
228 }
229 }
230
231 // Keep these Rust tests in sync with the Python ones in
232 // `tests/test-config-parselist.py`
233 assert_parse_list!(b"" => []);
234 assert_parse_list!(b"," => []);
235 assert_parse_list!(b"A" => [b"A"]);
236 assert_parse_list!(b"B,B" => [b"B", b"B"]);
237 assert_parse_list!(b", C, ,C," => [b"C", b"C"]);
238 assert_parse_list!(b"\"" => [b"\""]);
239 assert_parse_list!(b"\"\"" => [b"", b""]);
240 assert_parse_list!(b"D,\"" => [b"D", b"\""]);
241 assert_parse_list!(b"E,\"\"" => [b"E", b"", b""]);
242 assert_parse_list!(b"\"F,F\"" => [b"F,F"]);
243 assert_parse_list!(b"\"G,G" => [b"\"G", b"G"]);
244 assert_parse_list!(b"\"H \\\",\\\"H" => [b"\"H", b",", b"H"]);
245 assert_parse_list!(b"I,I\"" => [b"I", b"I\""]);
246 assert_parse_list!(b"J,\"J" => [b"J", b"\"J"]);
247 assert_parse_list!(b"K K" => [b"K", b"K"]);
248 assert_parse_list!(b"\"K\" K" => [b"K", b"K"]);
249 assert_parse_list!(b"L\tL" => [b"L", b"L"]);
250 assert_parse_list!(b"\"L\"\tL" => [b"L", b"", b"L"]);
251 assert_parse_list!(b"M\x0bM" => [b"M", b"M"]);
252 assert_parse_list!(b"\"M\"\x0bM" => [b"M", b"", b"M"]);
253 assert_parse_list!(b"\"N\" , ,\"" => [b"N\""]);
254 assert_parse_list!(b"\" ,O, " => [b"\"", b"O"]);
255 }
256
45 #[test]
257 #[test]
46 fn test_parse_byte_size() {
258 fn test_parse_byte_size() {
47 assert_eq!(parse_byte_size(b""), None);
259 assert_eq!(parse_byte_size(b""), None);
48 assert_eq!(parse_byte_size(b"b"), None);
260 assert_eq!(parse_byte_size(b"b"), None);
49
261
50 assert_eq!(parse_byte_size(b"12"), Some(12));
262 assert_eq!(parse_byte_size(b"12"), Some(12));
51 assert_eq!(parse_byte_size(b"12b"), Some(12));
263 assert_eq!(parse_byte_size(b"12b"), Some(12));
52 assert_eq!(parse_byte_size(b"12 b"), Some(12));
264 assert_eq!(parse_byte_size(b"12 b"), Some(12));
53 assert_eq!(parse_byte_size(b"12.1 b"), Some(12));
265 assert_eq!(parse_byte_size(b"12.1 b"), Some(12));
54 assert_eq!(parse_byte_size(b"1.1 K"), Some(1126));
266 assert_eq!(parse_byte_size(b"1.1 K"), Some(1126));
55 assert_eq!(parse_byte_size(b"1.1 kB"), Some(1126));
267 assert_eq!(parse_byte_size(b"1.1 kB"), Some(1126));
56
268
57 assert_eq!(parse_byte_size(b"-12 b"), None);
269 assert_eq!(parse_byte_size(b"-12 b"), None);
58 assert_eq!(parse_byte_size(b"-0.1 b"), None);
270 assert_eq!(parse_byte_size(b"-0.1 b"), None);
59 assert_eq!(parse_byte_size(b"0.1 b"), Some(0));
271 assert_eq!(parse_byte_size(b"0.1 b"), Some(0));
60 assert_eq!(parse_byte_size(b"12.1 b"), Some(12));
272 assert_eq!(parse_byte_size(b"12.1 b"), Some(12));
61 }
273 }
General Comments 0
You need to be logged in to leave comments. Login now