##// END OF EJS Templates
rhg: Add support for environment variables in config include paths...
Simon Sapin -
r47476:91ab5190 default
parent child Browse files
Show More
@@ -1,310 +1,311
1 1 // layer.rs
2 2 //
3 3 // Copyright 2020
4 4 // Valentin Gatien-Baron,
5 5 // Raphaël Gomès <rgomes@octobus.net>
6 6 //
7 7 // This software may be used and distributed according to the terms of the
8 8 // GNU General Public License version 2 or any later version.
9 9
10 10 use crate::errors::HgError;
11 11 use crate::utils::files::{get_bytes_from_path, get_path_from_bytes};
12 12 use format_bytes::{format_bytes, write_bytes, DisplayBytes};
13 13 use lazy_static::lazy_static;
14 14 use regex::bytes::Regex;
15 15 use std::collections::HashMap;
16 16 use std::path::{Path, PathBuf};
17 17
18 18 lazy_static! {
19 19 static ref SECTION_RE: Regex = make_regex(r"^\[([^\[]+)\]");
20 20 static ref ITEM_RE: Regex = make_regex(r"^([^=\s][^=]*?)\s*=\s*((.*\S)?)");
21 21 /// Continuation whitespace
22 22 static ref CONT_RE: Regex = make_regex(r"^\s+(\S|\S.*\S)\s*$");
23 23 static ref EMPTY_RE: Regex = make_regex(r"^(;|#|\s*$)");
24 24 static ref COMMENT_RE: Regex = make_regex(r"^(;|#)");
25 25 /// A directive that allows for removing previous entries
26 26 static ref UNSET_RE: Regex = make_regex(r"^%unset\s+(\S+)");
27 27 /// A directive that allows for including other config files
28 28 static ref INCLUDE_RE: Regex = make_regex(r"^%include\s+(\S|\S.*\S)\s*$");
29 29 }
30 30
31 31 /// All config values separated by layers of precedence.
32 32 /// Each config source may be split in multiple layers if `%include` directives
33 33 /// are used.
34 34 /// TODO detail the general precedence
35 35 #[derive(Clone)]
36 36 pub struct ConfigLayer {
37 37 /// Mapping of the sections to their items
38 38 sections: HashMap<Vec<u8>, ConfigItem>,
39 39 /// All sections (and their items/values) in a layer share the same origin
40 40 pub origin: ConfigOrigin,
41 41 /// Whether this layer comes from a trusted user or group
42 42 pub trusted: bool,
43 43 }
44 44
45 45 impl ConfigLayer {
46 46 pub fn new(origin: ConfigOrigin) -> Self {
47 47 ConfigLayer {
48 48 sections: HashMap::new(),
49 49 trusted: true, // TODO check
50 50 origin,
51 51 }
52 52 }
53 53
54 54 /// Parse `--config` CLI arguments and return a layer if there’s any
55 55 pub(crate) fn parse_cli_args(
56 56 cli_config_args: impl IntoIterator<Item = impl AsRef<[u8]>>,
57 57 ) -> Result<Option<Self>, ConfigError> {
58 58 fn parse_one(arg: &[u8]) -> Option<(Vec<u8>, Vec<u8>, Vec<u8>)> {
59 59 use crate::utils::SliceExt;
60 60
61 61 let (section_and_item, value) = arg.split_2(b'=')?;
62 62 let (section, item) = section_and_item.trim().split_2(b'.')?;
63 63 Some((
64 64 section.to_owned(),
65 65 item.to_owned(),
66 66 value.trim().to_owned(),
67 67 ))
68 68 }
69 69
70 70 let mut layer = Self::new(ConfigOrigin::CommandLine);
71 71 for arg in cli_config_args {
72 72 let arg = arg.as_ref();
73 73 if let Some((section, item, value)) = parse_one(arg) {
74 74 layer.add(section, item, value, None);
75 75 } else {
76 76 Err(HgError::abort(format!(
77 77 "abort: malformed --config option: '{}' \
78 78 (use --config section.name=value)",
79 79 String::from_utf8_lossy(arg),
80 80 )))?
81 81 }
82 82 }
83 83 if layer.sections.is_empty() {
84 84 Ok(None)
85 85 } else {
86 86 Ok(Some(layer))
87 87 }
88 88 }
89 89
90 90 /// Returns whether this layer comes from `--config` CLI arguments
91 91 pub(crate) fn is_from_command_line(&self) -> bool {
92 92 if let ConfigOrigin::CommandLine = self.origin {
93 93 true
94 94 } else {
95 95 false
96 96 }
97 97 }
98 98
99 99 /// Add an entry to the config, overwriting the old one if already present.
100 100 pub fn add(
101 101 &mut self,
102 102 section: Vec<u8>,
103 103 item: Vec<u8>,
104 104 value: Vec<u8>,
105 105 line: Option<usize>,
106 106 ) {
107 107 self.sections
108 108 .entry(section)
109 109 .or_insert_with(|| HashMap::new())
110 110 .insert(item, ConfigValue { bytes: value, line });
111 111 }
112 112
113 113 /// Returns the config value in `<section>.<item>` if it exists
114 114 pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&ConfigValue> {
115 115 Some(self.sections.get(section)?.get(item)?)
116 116 }
117 117
118 118 /// Returns the keys defined in the given section
119 119 pub fn iter_keys(&self, section: &[u8]) -> impl Iterator<Item = &[u8]> {
120 120 self.sections
121 121 .get(section)
122 122 .into_iter()
123 123 .flat_map(|section| section.keys().map(|vec| &**vec))
124 124 }
125 125
126 126 pub fn is_empty(&self) -> bool {
127 127 self.sections.is_empty()
128 128 }
129 129
130 130 /// Returns a `Vec` of layers in order of precedence (so, in read order),
131 131 /// recursively parsing the `%include` directives if any.
132 132 pub fn parse(src: &Path, data: &[u8]) -> Result<Vec<Self>, ConfigError> {
133 133 let mut layers = vec![];
134 134
135 135 // Discard byte order mark if any
136 136 let data = if data.starts_with(b"\xef\xbb\xbf") {
137 137 &data[3..]
138 138 } else {
139 139 data
140 140 };
141 141
142 142 // TODO check if it's trusted
143 143 let mut current_layer = Self::new(ConfigOrigin::File(src.to_owned()));
144 144
145 145 let mut lines_iter =
146 146 data.split(|b| *b == b'\n').enumerate().peekable();
147 147 let mut section = b"".to_vec();
148 148
149 149 while let Some((index, bytes)) = lines_iter.next() {
150 150 let line = Some(index + 1);
151 151 if let Some(m) = INCLUDE_RE.captures(&bytes) {
152 152 let filename_bytes = &m[1];
153 let filename_bytes = crate::utils::expand_vars(filename_bytes);
153 154 // `Path::parent` only fails for the root directory,
154 155 // which `src` can’t be since we’ve managed to open it as a
155 156 // file.
156 157 let dir = src
157 158 .parent()
158 159 .expect("Path::parent fail on a file we’ve read");
159 160 // `Path::join` with an absolute argument correctly ignores the
160 161 // base path
161 162 let filename = dir.join(&get_path_from_bytes(&filename_bytes));
162 163 let data = std::fs::read(&filename).map_err(|io_error| {
163 164 ConfigParseError {
164 165 origin: ConfigOrigin::File(src.to_owned()),
165 166 line,
166 167 message: format_bytes!(
167 168 b"cannot include {} ({})",
168 169 filename_bytes,
169 170 format_bytes::Utf8(io_error)
170 171 ),
171 172 }
172 173 })?;
173 174 layers.push(current_layer);
174 175 layers.extend(Self::parse(&filename, &data)?);
175 176 current_layer = Self::new(ConfigOrigin::File(src.to_owned()));
176 177 } else if let Some(_) = EMPTY_RE.captures(&bytes) {
177 178 } else if let Some(m) = SECTION_RE.captures(&bytes) {
178 179 section = m[1].to_vec();
179 180 } else if let Some(m) = ITEM_RE.captures(&bytes) {
180 181 let item = m[1].to_vec();
181 182 let mut value = m[2].to_vec();
182 183 loop {
183 184 match lines_iter.peek() {
184 185 None => break,
185 186 Some((_, v)) => {
186 187 if let Some(_) = COMMENT_RE.captures(&v) {
187 188 } else if let Some(_) = CONT_RE.captures(&v) {
188 189 value.extend(b"\n");
189 190 value.extend(&m[1]);
190 191 } else {
191 192 break;
192 193 }
193 194 }
194 195 };
195 196 lines_iter.next();
196 197 }
197 198 current_layer.add(section.clone(), item, value, line);
198 199 } else if let Some(m) = UNSET_RE.captures(&bytes) {
199 200 if let Some(map) = current_layer.sections.get_mut(&section) {
200 201 map.remove(&m[1]);
201 202 }
202 203 } else {
203 204 let message = if bytes.starts_with(b" ") {
204 205 format_bytes!(b"unexpected leading whitespace: {}", bytes)
205 206 } else {
206 207 bytes.to_owned()
207 208 };
208 209 return Err(ConfigParseError {
209 210 origin: ConfigOrigin::File(src.to_owned()),
210 211 line,
211 212 message,
212 213 }
213 214 .into());
214 215 }
215 216 }
216 217 if !current_layer.is_empty() {
217 218 layers.push(current_layer);
218 219 }
219 220 Ok(layers)
220 221 }
221 222 }
222 223
223 224 impl DisplayBytes for ConfigLayer {
224 225 fn display_bytes(
225 226 &self,
226 227 out: &mut dyn std::io::Write,
227 228 ) -> std::io::Result<()> {
228 229 let mut sections: Vec<_> = self.sections.iter().collect();
229 230 sections.sort_by(|e0, e1| e0.0.cmp(e1.0));
230 231
231 232 for (section, items) in sections.into_iter() {
232 233 let mut items: Vec<_> = items.into_iter().collect();
233 234 items.sort_by(|e0, e1| e0.0.cmp(e1.0));
234 235
235 236 for (item, config_entry) in items {
236 237 write_bytes!(
237 238 out,
238 239 b"{}.{}={} # {}\n",
239 240 section,
240 241 item,
241 242 &config_entry.bytes,
242 243 &self.origin,
243 244 )?
244 245 }
245 246 }
246 247 Ok(())
247 248 }
248 249 }
249 250
250 251 /// Mapping of section item to value.
251 252 /// In the following:
252 253 /// ```text
253 254 /// [ui]
254 255 /// paginate=no
255 256 /// ```
256 257 /// "paginate" is the section item and "no" the value.
257 258 pub type ConfigItem = HashMap<Vec<u8>, ConfigValue>;
258 259
259 260 #[derive(Clone, Debug, PartialEq)]
260 261 pub struct ConfigValue {
261 262 /// The raw bytes of the value (be it from the CLI, env or from a file)
262 263 pub bytes: Vec<u8>,
263 264 /// Only present if the value comes from a file, 1-indexed.
264 265 pub line: Option<usize>,
265 266 }
266 267
267 268 #[derive(Clone, Debug)]
268 269 pub enum ConfigOrigin {
269 270 /// From a configuration file
270 271 File(PathBuf),
271 272 /// From a `--config` CLI argument
272 273 CommandLine,
273 274 /// From environment variables like `$PAGER` or `$EDITOR`
274 275 Environment(Vec<u8>),
275 276 /* TODO cli
276 277 * TODO defaults (configitems.py)
277 278 * TODO extensions
278 279 * TODO Python resources?
279 280 * Others? */
280 281 }
281 282
282 283 impl DisplayBytes for ConfigOrigin {
283 284 fn display_bytes(
284 285 &self,
285 286 out: &mut dyn std::io::Write,
286 287 ) -> std::io::Result<()> {
287 288 match self {
288 289 ConfigOrigin::File(p) => out.write_all(&get_bytes_from_path(p)),
289 290 ConfigOrigin::CommandLine => out.write_all(b"--config"),
290 291 ConfigOrigin::Environment(e) => write_bytes!(out, b"${}", e),
291 292 }
292 293 }
293 294 }
294 295
295 296 #[derive(Debug)]
296 297 pub struct ConfigParseError {
297 298 pub origin: ConfigOrigin,
298 299 pub line: Option<usize>,
299 300 pub message: Vec<u8>,
300 301 }
301 302
302 303 #[derive(Debug, derive_more::From)]
303 304 pub enum ConfigError {
304 305 Parse(ConfigParseError),
305 306 Other(HgError),
306 307 }
307 308
308 309 fn make_regex(pattern: &'static str) -> Regex {
309 310 Regex::new(pattern).expect("expected a valid regex")
310 311 }
@@ -1,430 +1,483
1 1 // utils module
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Contains useful functions, traits, structs, etc. for use in core.
9 9
10 10 use crate::errors::{HgError, IoErrorContext};
11 11 use crate::utils::hg_path::HgPath;
12 12 use im_rc::ordmap::DiffItem;
13 13 use im_rc::ordmap::OrdMap;
14 14 use std::cell::Cell;
15 15 use std::fmt;
16 16 use std::{io::Write, ops::Deref};
17 17
18 18 pub mod files;
19 19 pub mod hg_path;
20 20 pub mod path_auditor;
21 21
22 22 /// Useful until rust/issues/56345 is stable
23 23 ///
24 24 /// # Examples
25 25 ///
26 26 /// ```
27 27 /// use crate::hg::utils::find_slice_in_slice;
28 28 ///
29 29 /// let haystack = b"This is the haystack".to_vec();
30 30 /// assert_eq!(find_slice_in_slice(&haystack, b"the"), Some(8));
31 31 /// assert_eq!(find_slice_in_slice(&haystack, b"not here"), None);
32 32 /// ```
33 33 pub fn find_slice_in_slice<T>(slice: &[T], needle: &[T]) -> Option<usize>
34 34 where
35 35 for<'a> &'a [T]: PartialEq,
36 36 {
37 37 slice
38 38 .windows(needle.len())
39 39 .position(|window| window == needle)
40 40 }
41 41
42 42 /// Replaces the `from` slice with the `to` slice inside the `buf` slice.
43 43 ///
44 44 /// # Examples
45 45 ///
46 46 /// ```
47 47 /// use crate::hg::utils::replace_slice;
48 48 /// let mut line = b"I hate writing tests!".to_vec();
49 49 /// replace_slice(&mut line, b"hate", b"love");
50 50 /// assert_eq!(
51 51 /// line,
52 52 /// b"I love writing tests!".to_vec()
53 53 /// );
54 54 /// ```
55 55 pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T])
56 56 where
57 57 T: Clone + PartialEq,
58 58 {
59 59 if buf.len() < from.len() || from.len() != to.len() {
60 60 return;
61 61 }
62 62 for i in 0..=buf.len() - from.len() {
63 63 if buf[i..].starts_with(from) {
64 64 buf[i..(i + from.len())].clone_from_slice(to);
65 65 }
66 66 }
67 67 }
68 68
69 69 pub trait SliceExt {
70 70 fn trim_end_newlines(&self) -> &Self;
71 71 fn trim_end(&self) -> &Self;
72 72 fn trim_start(&self) -> &Self;
73 73 fn trim(&self) -> &Self;
74 74 fn drop_prefix(&self, needle: &Self) -> Option<&Self>;
75 75 fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])>;
76 76 }
77 77
78 78 #[allow(clippy::trivially_copy_pass_by_ref)]
79 79 fn is_not_whitespace(c: &u8) -> bool {
80 80 !(*c as char).is_whitespace()
81 81 }
82 82
83 83 impl SliceExt for [u8] {
84 84 fn trim_end_newlines(&self) -> &[u8] {
85 85 if let Some(last) = self.iter().rposition(|&byte| byte != b'\n') {
86 86 &self[..=last]
87 87 } else {
88 88 &[]
89 89 }
90 90 }
91 91 fn trim_end(&self) -> &[u8] {
92 92 if let Some(last) = self.iter().rposition(is_not_whitespace) {
93 93 &self[..=last]
94 94 } else {
95 95 &[]
96 96 }
97 97 }
98 98 fn trim_start(&self) -> &[u8] {
99 99 if let Some(first) = self.iter().position(is_not_whitespace) {
100 100 &self[first..]
101 101 } else {
102 102 &[]
103 103 }
104 104 }
105 105
106 106 /// ```
107 107 /// use hg::utils::SliceExt;
108 108 /// assert_eq!(
109 109 /// b" to trim ".trim(),
110 110 /// b"to trim"
111 111 /// );
112 112 /// assert_eq!(
113 113 /// b"to trim ".trim(),
114 114 /// b"to trim"
115 115 /// );
116 116 /// assert_eq!(
117 117 /// b" to trim".trim(),
118 118 /// b"to trim"
119 119 /// );
120 120 /// ```
121 121 fn trim(&self) -> &[u8] {
122 122 self.trim_start().trim_end()
123 123 }
124 124
125 125 fn drop_prefix(&self, needle: &Self) -> Option<&Self> {
126 126 if self.starts_with(needle) {
127 127 Some(&self[needle.len()..])
128 128 } else {
129 129 None
130 130 }
131 131 }
132 132
133 133 fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])> {
134 134 let mut iter = self.splitn(2, |&byte| byte == separator);
135 135 let a = iter.next()?;
136 136 let b = iter.next()?;
137 137 Some((a, b))
138 138 }
139 139 }
140 140
141 141 pub trait Escaped {
142 142 /// Return bytes escaped for display to the user
143 143 fn escaped_bytes(&self) -> Vec<u8>;
144 144 }
145 145
146 146 impl Escaped for u8 {
147 147 fn escaped_bytes(&self) -> Vec<u8> {
148 148 let mut acc = vec![];
149 149 match self {
150 150 c @ b'\'' | c @ b'\\' => {
151 151 acc.push(b'\\');
152 152 acc.push(*c);
153 153 }
154 154 b'\t' => {
155 155 acc.extend(br"\\t");
156 156 }
157 157 b'\n' => {
158 158 acc.extend(br"\\n");
159 159 }
160 160 b'\r' => {
161 161 acc.extend(br"\\r");
162 162 }
163 163 c if (*c < b' ' || *c >= 127) => {
164 164 write!(acc, "\\x{:x}", self).unwrap();
165 165 }
166 166 c => {
167 167 acc.push(*c);
168 168 }
169 169 }
170 170 acc
171 171 }
172 172 }
173 173
174 174 impl<'a, T: Escaped> Escaped for &'a [T] {
175 175 fn escaped_bytes(&self) -> Vec<u8> {
176 176 self.iter().flat_map(Escaped::escaped_bytes).collect()
177 177 }
178 178 }
179 179
180 180 impl<T: Escaped> Escaped for Vec<T> {
181 181 fn escaped_bytes(&self) -> Vec<u8> {
182 182 self.deref().escaped_bytes()
183 183 }
184 184 }
185 185
186 186 impl<'a> Escaped for &'a HgPath {
187 187 fn escaped_bytes(&self) -> Vec<u8> {
188 188 self.as_bytes().escaped_bytes()
189 189 }
190 190 }
191 191
192 192 // TODO: use the str method when we require Rust 1.45
193 193 pub(crate) fn strip_suffix<'a>(s: &'a str, suffix: &str) -> Option<&'a str> {
194 194 if s.ends_with(suffix) {
195 195 Some(&s[..s.len() - suffix.len()])
196 196 } else {
197 197 None
198 198 }
199 199 }
200 200
201 201 #[cfg(unix)]
202 202 pub fn shell_quote(value: &[u8]) -> Vec<u8> {
203 203 // TODO: Use the `matches!` macro when we require Rust 1.42+
204 204 if value.iter().all(|&byte| match byte {
205 205 b'a'..=b'z'
206 206 | b'A'..=b'Z'
207 207 | b'0'..=b'9'
208 208 | b'.'
209 209 | b'_'
210 210 | b'/'
211 211 | b'+'
212 212 | b'-' => true,
213 213 _ => false,
214 214 }) {
215 215 value.to_owned()
216 216 } else {
217 217 let mut quoted = Vec::with_capacity(value.len() + 2);
218 218 quoted.push(b'\'');
219 219 for &byte in value {
220 220 if byte == b'\'' {
221 221 quoted.push(b'\\');
222 222 }
223 223 quoted.push(byte);
224 224 }
225 225 quoted.push(b'\'');
226 226 quoted
227 227 }
228 228 }
229 229
230 230 pub fn current_dir() -> Result<std::path::PathBuf, HgError> {
231 231 std::env::current_dir().map_err(|error| HgError::IoError {
232 232 error,
233 233 context: IoErrorContext::CurrentDir,
234 234 })
235 235 }
236 236
237 237 pub fn current_exe() -> Result<std::path::PathBuf, HgError> {
238 238 std::env::current_exe().map_err(|error| HgError::IoError {
239 239 error,
240 240 context: IoErrorContext::CurrentExe,
241 241 })
242 242 }
243 243
244 /// Expand `$FOO` and `${FOO}` environment variables in the given byte string
245 pub fn expand_vars(s: &[u8]) -> std::borrow::Cow<[u8]> {
246 lazy_static::lazy_static! {
247 /// https://github.com/python/cpython/blob/3.9/Lib/posixpath.py#L301
248 /// The `x` makes whitespace ignored.
249 /// `-u` disables the Unicode flag, which makes `\w` like Python with the ASCII flag.
250 static ref VAR_RE: regex::bytes::Regex =
251 regex::bytes::Regex::new(r"(?x-u)
252 \$
253 (?:
254 (\w+)
255 |
256 \{
257 ([^}]*)
258 \}
259 )
260 ").unwrap();
261 }
262 VAR_RE.replace_all(s, |captures: &regex::bytes::Captures| {
263 let var_name = files::get_os_str_from_bytes(
264 captures
265 .get(1)
266 .or_else(|| captures.get(2))
267 .expect("either side of `|` must participate in match")
268 .as_bytes(),
269 );
270 std::env::var_os(var_name)
271 .map(files::get_bytes_from_os_str)
272 .unwrap_or_else(|| {
273 // Referencing an environment variable that does not exist.
274 // Leave the $FOO reference as-is.
275 captures[0].to_owned()
276 })
277 })
278 }
279
280 #[test]
281 fn test_expand_vars() {
282 // Modifying process-global state in a test isn’t great,
283 // but hopefully this won’t collide with anything.
284 std::env::set_var("TEST_EXPAND_VAR", "1");
285 assert_eq!(
286 expand_vars(b"before/$TEST_EXPAND_VAR/after"),
287 &b"before/1/after"[..]
288 );
289 assert_eq!(
290 expand_vars(b"before${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}after"),
291 &b"before111after"[..]
292 );
293 let s = b"before $SOME_LONG_NAME_THAT_WE_ASSUME_IS_NOT_AN_ACTUAL_ENV_VAR after";
294 assert_eq!(expand_vars(s), &s[..]);
295 }
296
244 297 pub(crate) enum MergeResult<V> {
245 298 UseLeftValue,
246 299 UseRightValue,
247 300 UseNewValue(V),
248 301 }
249 302
250 303 /// Return the union of the two given maps,
251 304 /// calling `merge(key, left_value, right_value)` to resolve keys that exist in
252 305 /// both.
253 306 ///
254 307 /// CC https://github.com/bodil/im-rs/issues/166
255 308 pub(crate) fn ordmap_union_with_merge<K, V>(
256 309 left: OrdMap<K, V>,
257 310 right: OrdMap<K, V>,
258 311 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
259 312 ) -> OrdMap<K, V>
260 313 where
261 314 K: Clone + Ord,
262 315 V: Clone + PartialEq,
263 316 {
264 317 if left.ptr_eq(&right) {
265 318 // One of the two maps is an unmodified clone of the other
266 319 left
267 320 } else if left.len() / 2 > right.len() {
268 321 // When two maps have different sizes,
269 322 // their size difference is a lower bound on
270 323 // how many keys of the larger map are not also in the smaller map.
271 324 // This in turn is a lower bound on the number of differences in
272 325 // `OrdMap::diff` and the "amount of work" that would be done
273 326 // by `ordmap_union_with_merge_by_diff`.
274 327 //
275 328 // Here `left` is more than twice the size of `right`,
276 329 // so the number of differences is more than the total size of
277 330 // `right`. Therefore an algorithm based on iterating `right`
278 331 // is more efficient.
279 332 //
280 333 // This helps a lot when a tiny (or empty) map is merged
281 334 // with a large one.
282 335 ordmap_union_with_merge_by_iter(left, right, merge)
283 336 } else if left.len() < right.len() / 2 {
284 337 // Same as above but with `left` and `right` swapped
285 338 ordmap_union_with_merge_by_iter(right, left, |key, a, b| {
286 339 // Also swapped in `merge` arguments:
287 340 match merge(key, b, a) {
288 341 MergeResult::UseNewValue(v) => MergeResult::UseNewValue(v),
289 342 // … and swap back in `merge` result:
290 343 MergeResult::UseLeftValue => MergeResult::UseRightValue,
291 344 MergeResult::UseRightValue => MergeResult::UseLeftValue,
292 345 }
293 346 })
294 347 } else {
295 348 // For maps of similar size, use the algorithm based on `OrdMap::diff`
296 349 ordmap_union_with_merge_by_diff(left, right, merge)
297 350 }
298 351 }
299 352
300 353 /// Efficient if `right` is much smaller than `left`
301 354 fn ordmap_union_with_merge_by_iter<K, V>(
302 355 mut left: OrdMap<K, V>,
303 356 right: OrdMap<K, V>,
304 357 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
305 358 ) -> OrdMap<K, V>
306 359 where
307 360 K: Clone + Ord,
308 361 V: Clone,
309 362 {
310 363 for (key, right_value) in right {
311 364 match left.get(&key) {
312 365 None => {
313 366 left.insert(key, right_value);
314 367 }
315 368 Some(left_value) => match merge(&key, left_value, &right_value) {
316 369 MergeResult::UseLeftValue => {}
317 370 MergeResult::UseRightValue => {
318 371 left.insert(key, right_value);
319 372 }
320 373 MergeResult::UseNewValue(new_value) => {
321 374 left.insert(key, new_value);
322 375 }
323 376 },
324 377 }
325 378 }
326 379 left
327 380 }
328 381
329 382 /// Fallback when both maps are of similar size
330 383 fn ordmap_union_with_merge_by_diff<K, V>(
331 384 mut left: OrdMap<K, V>,
332 385 mut right: OrdMap<K, V>,
333 386 mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
334 387 ) -> OrdMap<K, V>
335 388 where
336 389 K: Clone + Ord,
337 390 V: Clone + PartialEq,
338 391 {
339 392 // (key, value) pairs that would need to be inserted in either map
340 393 // in order to turn it into the union.
341 394 //
342 395 // TODO: if/when https://github.com/bodil/im-rs/pull/168 is accepted,
343 396 // change these from `Vec<(K, V)>` to `Vec<(&K, Cow<V>)>`
344 397 // with `left_updates` only borrowing from `right` and `right_updates` from
345 398 // `left`, and with `Cow::Owned` used for `MergeResult::UseNewValue`.
346 399 //
347 400 // This would allow moving all `.clone()` calls to after we’ve decided
348 401 // which of `right_updates` or `left_updates` to use
349 402 // (value ones becoming `Cow::into_owned`),
350 403 // and avoid making clones we don’t end up using.
351 404 let mut left_updates = Vec::new();
352 405 let mut right_updates = Vec::new();
353 406
354 407 for difference in left.diff(&right) {
355 408 match difference {
356 409 DiffItem::Add(key, value) => {
357 410 left_updates.push((key.clone(), value.clone()))
358 411 }
359 412 DiffItem::Remove(key, value) => {
360 413 right_updates.push((key.clone(), value.clone()))
361 414 }
362 415 DiffItem::Update {
363 416 old: (key, left_value),
364 417 new: (_, right_value),
365 418 } => match merge(key, left_value, right_value) {
366 419 MergeResult::UseLeftValue => {
367 420 right_updates.push((key.clone(), left_value.clone()))
368 421 }
369 422 MergeResult::UseRightValue => {
370 423 left_updates.push((key.clone(), right_value.clone()))
371 424 }
372 425 MergeResult::UseNewValue(new_value) => {
373 426 left_updates.push((key.clone(), new_value.clone()));
374 427 right_updates.push((key.clone(), new_value))
375 428 }
376 429 },
377 430 }
378 431 }
379 432 if left_updates.len() < right_updates.len() {
380 433 for (key, value) in left_updates {
381 434 left.insert(key, value);
382 435 }
383 436 left
384 437 } else {
385 438 for (key, value) in right_updates {
386 439 right.insert(key, value);
387 440 }
388 441 right
389 442 }
390 443 }
391 444
392 445 /// Join items of the iterable with the given separator, similar to Python’s
393 446 /// `separator.join(iter)`.
394 447 ///
395 448 /// Formatting the return value consumes the iterator.
396 449 /// Formatting it again will produce an empty string.
397 450 pub fn join_display(
398 451 iter: impl IntoIterator<Item = impl fmt::Display>,
399 452 separator: impl fmt::Display,
400 453 ) -> impl fmt::Display {
401 454 JoinDisplay {
402 455 iter: Cell::new(Some(iter.into_iter())),
403 456 separator,
404 457 }
405 458 }
406 459
407 460 struct JoinDisplay<I, S> {
408 461 iter: Cell<Option<I>>,
409 462 separator: S,
410 463 }
411 464
412 465 impl<I, T, S> fmt::Display for JoinDisplay<I, S>
413 466 where
414 467 I: Iterator<Item = T>,
415 468 T: fmt::Display,
416 469 S: fmt::Display,
417 470 {
418 471 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
419 472 if let Some(mut iter) = self.iter.take() {
420 473 if let Some(first) = iter.next() {
421 474 first.fmt(f)?;
422 475 }
423 476 for value in iter {
424 477 self.separator.fmt(f)?;
425 478 value.fmt(f)?;
426 479 }
427 480 }
428 481 Ok(())
429 482 }
430 483 }
@@ -1,448 +1,451
1 1 // files.rs
2 2 //
3 3 // Copyright 2019
4 4 // Raphaël Gomès <rgomes@octobus.net>,
5 5 // Yuya Nishihara <yuya@tcha.org>
6 6 //
7 7 // This software may be used and distributed according to the terms of the
8 8 // GNU General Public License version 2 or any later version.
9 9
10 10 //! Functions for fiddling with files.
11 11
12 12 use crate::utils::{
13 13 hg_path::{path_to_hg_path_buf, HgPath, HgPathBuf, HgPathError},
14 14 path_auditor::PathAuditor,
15 15 replace_slice,
16 16 };
17 17 use lazy_static::lazy_static;
18 18 use same_file::is_same_file;
19 19 use std::borrow::{Cow, ToOwned};
20 20 use std::ffi::OsStr;
21 21 use std::fs::Metadata;
22 22 use std::iter::FusedIterator;
23 23 use std::ops::Deref;
24 24 use std::path::{Path, PathBuf};
25 25
26 pub fn get_path_from_bytes(bytes: &[u8]) -> &Path {
26 pub fn get_os_str_from_bytes(bytes: &[u8]) -> &OsStr {
27 27 let os_str;
28 28 #[cfg(unix)]
29 29 {
30 30 use std::os::unix::ffi::OsStrExt;
31 31 os_str = std::ffi::OsStr::from_bytes(bytes);
32 32 }
33 33 // TODO Handle other platforms
34 34 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
35 35 // Perhaps, the return type would have to be Result<PathBuf>.
36 os_str
37 }
36 38
37 Path::new(os_str)
39 pub fn get_path_from_bytes(bytes: &[u8]) -> &Path {
40 Path::new(get_os_str_from_bytes(bytes))
38 41 }
39 42
40 43 // TODO: need to convert from WTF8 to MBCS bytes on Windows.
41 44 // that's why Vec<u8> is returned.
42 45 #[cfg(unix)]
43 46 pub fn get_bytes_from_path(path: impl AsRef<Path>) -> Vec<u8> {
44 47 get_bytes_from_os_str(path.as_ref())
45 48 }
46 49
47 50 #[cfg(unix)]
48 51 pub fn get_bytes_from_os_str(str: impl AsRef<OsStr>) -> Vec<u8> {
49 52 use std::os::unix::ffi::OsStrExt;
50 53 str.as_ref().as_bytes().to_vec()
51 54 }
52 55
53 56 /// An iterator over repository path yielding itself and its ancestors.
54 57 #[derive(Copy, Clone, Debug)]
55 58 pub struct Ancestors<'a> {
56 59 next: Option<&'a HgPath>,
57 60 }
58 61
59 62 impl<'a> Iterator for Ancestors<'a> {
60 63 type Item = &'a HgPath;
61 64
62 65 fn next(&mut self) -> Option<Self::Item> {
63 66 let next = self.next;
64 67 self.next = match self.next {
65 68 Some(s) if s.is_empty() => None,
66 69 Some(s) => {
67 70 let p = s.bytes().rposition(|c| *c == b'/').unwrap_or(0);
68 71 Some(HgPath::new(&s.as_bytes()[..p]))
69 72 }
70 73 None => None,
71 74 };
72 75 next
73 76 }
74 77 }
75 78
76 79 impl<'a> FusedIterator for Ancestors<'a> {}
77 80
78 81 /// An iterator over repository path yielding itself and its ancestors.
79 82 #[derive(Copy, Clone, Debug)]
80 83 pub(crate) struct AncestorsWithBase<'a> {
81 84 next: Option<(&'a HgPath, &'a HgPath)>,
82 85 }
83 86
84 87 impl<'a> Iterator for AncestorsWithBase<'a> {
85 88 type Item = (&'a HgPath, &'a HgPath);
86 89
87 90 fn next(&mut self) -> Option<Self::Item> {
88 91 let next = self.next;
89 92 self.next = match self.next {
90 93 Some((s, _)) if s.is_empty() => None,
91 94 Some((s, _)) => Some(s.split_filename()),
92 95 None => None,
93 96 };
94 97 next
95 98 }
96 99 }
97 100
98 101 impl<'a> FusedIterator for AncestorsWithBase<'a> {}
99 102
100 103 /// Returns an iterator yielding ancestor directories of the given repository
101 104 /// path.
102 105 ///
103 106 /// The path is separated by '/', and must not start with '/'.
104 107 ///
105 108 /// The path itself isn't included unless it is b"" (meaning the root
106 109 /// directory.)
107 110 pub fn find_dirs(path: &HgPath) -> Ancestors {
108 111 let mut dirs = Ancestors { next: Some(path) };
109 112 if !path.is_empty() {
110 113 dirs.next(); // skip itself
111 114 }
112 115 dirs
113 116 }
114 117
115 118 /// Returns an iterator yielding ancestor directories of the given repository
116 119 /// path.
117 120 ///
118 121 /// The path is separated by '/', and must not start with '/'.
119 122 ///
120 123 /// The path itself isn't included unless it is b"" (meaning the root
121 124 /// directory.)
122 125 pub(crate) fn find_dirs_with_base(path: &HgPath) -> AncestorsWithBase {
123 126 let mut dirs = AncestorsWithBase {
124 127 next: Some((path, HgPath::new(b""))),
125 128 };
126 129 if !path.is_empty() {
127 130 dirs.next(); // skip itself
128 131 }
129 132 dirs
130 133 }
131 134
132 135 /// TODO more than ASCII?
133 136 pub fn normalize_case(path: &HgPath) -> HgPathBuf {
134 137 #[cfg(windows)] // NTFS compares via upper()
135 138 return path.to_ascii_uppercase();
136 139 #[cfg(unix)]
137 140 path.to_ascii_lowercase()
138 141 }
139 142
140 143 lazy_static! {
141 144 static ref IGNORED_CHARS: Vec<Vec<u8>> = {
142 145 [
143 146 0x200c, 0x200d, 0x200e, 0x200f, 0x202a, 0x202b, 0x202c, 0x202d,
144 147 0x202e, 0x206a, 0x206b, 0x206c, 0x206d, 0x206e, 0x206f, 0xfeff,
145 148 ]
146 149 .iter()
147 150 .map(|code| {
148 151 std::char::from_u32(*code)
149 152 .unwrap()
150 153 .encode_utf8(&mut [0; 3])
151 154 .bytes()
152 155 .collect()
153 156 })
154 157 .collect()
155 158 };
156 159 }
157 160
158 161 fn hfs_ignore_clean(bytes: &[u8]) -> Vec<u8> {
159 162 let mut buf = bytes.to_owned();
160 163 let needs_escaping = bytes.iter().any(|b| *b == b'\xe2' || *b == b'\xef');
161 164 if needs_escaping {
162 165 for forbidden in IGNORED_CHARS.iter() {
163 166 replace_slice(&mut buf, forbidden, &[])
164 167 }
165 168 buf
166 169 } else {
167 170 buf
168 171 }
169 172 }
170 173
171 174 pub fn lower_clean(bytes: &[u8]) -> Vec<u8> {
172 175 hfs_ignore_clean(&bytes.to_ascii_lowercase())
173 176 }
174 177
175 178 #[derive(Eq, PartialEq, Ord, PartialOrd, Copy, Clone)]
176 179 pub struct HgMetadata {
177 180 pub st_dev: u64,
178 181 pub st_mode: u32,
179 182 pub st_nlink: u64,
180 183 pub st_size: u64,
181 184 pub st_mtime: i64,
182 185 pub st_ctime: i64,
183 186 }
184 187
185 188 // TODO support other plaforms
186 189 #[cfg(unix)]
187 190 impl HgMetadata {
188 191 pub fn from_metadata(metadata: Metadata) -> Self {
189 192 use std::os::unix::fs::MetadataExt;
190 193 Self {
191 194 st_dev: metadata.dev(),
192 195 st_mode: metadata.mode(),
193 196 st_nlink: metadata.nlink(),
194 197 st_size: metadata.size(),
195 198 st_mtime: metadata.mtime(),
196 199 st_ctime: metadata.ctime(),
197 200 }
198 201 }
199 202 }
200 203
201 204 /// Returns the canonical path of `name`, given `cwd` and `root`
202 205 pub fn canonical_path(
203 206 root: impl AsRef<Path>,
204 207 cwd: impl AsRef<Path>,
205 208 name: impl AsRef<Path>,
206 209 ) -> Result<PathBuf, HgPathError> {
207 210 // TODO add missing normalization for other platforms
208 211 let root = root.as_ref();
209 212 let cwd = cwd.as_ref();
210 213 let name = name.as_ref();
211 214
212 215 let name = if !name.is_absolute() {
213 216 root.join(&cwd).join(&name)
214 217 } else {
215 218 name.to_owned()
216 219 };
217 220 let auditor = PathAuditor::new(&root);
218 221 if name != root && name.starts_with(&root) {
219 222 let name = name.strip_prefix(&root).unwrap();
220 223 auditor.audit_path(path_to_hg_path_buf(name)?)?;
221 224 Ok(name.to_owned())
222 225 } else if name == root {
223 226 Ok("".into())
224 227 } else {
225 228 // Determine whether `name' is in the hierarchy at or beneath `root',
226 229 // by iterating name=name.parent() until it returns `None` (can't
227 230 // check name == '/', because that doesn't work on windows).
228 231 let mut name = name.deref();
229 232 let original_name = name.to_owned();
230 233 loop {
231 234 let same = is_same_file(&name, &root).unwrap_or(false);
232 235 if same {
233 236 if name == original_name {
234 237 // `name` was actually the same as root (maybe a symlink)
235 238 return Ok("".into());
236 239 }
237 240 // `name` is a symlink to root, so `original_name` is under
238 241 // root
239 242 let rel_path = original_name.strip_prefix(&name).unwrap();
240 243 auditor.audit_path(path_to_hg_path_buf(&rel_path)?)?;
241 244 return Ok(rel_path.to_owned());
242 245 }
243 246 name = match name.parent() {
244 247 None => break,
245 248 Some(p) => p,
246 249 };
247 250 }
248 251 // TODO hint to the user about using --cwd
249 252 // Bubble up the responsibility to Python for now
250 253 Err(HgPathError::NotUnderRoot {
251 254 path: original_name.to_owned(),
252 255 root: root.to_owned(),
253 256 })
254 257 }
255 258 }
256 259
257 260 /// Returns the representation of the path relative to the current working
258 261 /// directory for display purposes.
259 262 ///
260 263 /// `cwd` is a `HgPath`, so it is considered relative to the root directory
261 264 /// of the repository.
262 265 ///
263 266 /// # Examples
264 267 ///
265 268 /// ```
266 269 /// use hg::utils::hg_path::HgPath;
267 270 /// use hg::utils::files::relativize_path;
268 271 /// use std::borrow::Cow;
269 272 ///
270 273 /// let file = HgPath::new(b"nested/file");
271 274 /// let cwd = HgPath::new(b"");
272 275 /// assert_eq!(relativize_path(file, cwd), Cow::Borrowed(b"nested/file"));
273 276 ///
274 277 /// let cwd = HgPath::new(b"nested");
275 278 /// assert_eq!(relativize_path(file, cwd), Cow::Borrowed(b"file"));
276 279 ///
277 280 /// let cwd = HgPath::new(b"other");
278 281 /// assert_eq!(relativize_path(file, cwd), Cow::Borrowed(b"../nested/file"));
279 282 /// ```
280 283 pub fn relativize_path(path: &HgPath, cwd: impl AsRef<HgPath>) -> Cow<[u8]> {
281 284 if cwd.as_ref().is_empty() {
282 285 Cow::Borrowed(path.as_bytes())
283 286 } else {
284 287 let mut res: Vec<u8> = Vec::new();
285 288 let mut path_iter = path.as_bytes().split(|b| *b == b'/').peekable();
286 289 let mut cwd_iter =
287 290 cwd.as_ref().as_bytes().split(|b| *b == b'/').peekable();
288 291 loop {
289 292 match (path_iter.peek(), cwd_iter.peek()) {
290 293 (Some(a), Some(b)) if a == b => (),
291 294 _ => break,
292 295 }
293 296 path_iter.next();
294 297 cwd_iter.next();
295 298 }
296 299 let mut need_sep = false;
297 300 for _ in cwd_iter {
298 301 if need_sep {
299 302 res.extend(b"/")
300 303 } else {
301 304 need_sep = true
302 305 };
303 306 res.extend(b"..");
304 307 }
305 308 for c in path_iter {
306 309 if need_sep {
307 310 res.extend(b"/")
308 311 } else {
309 312 need_sep = true
310 313 };
311 314 res.extend(c);
312 315 }
313 316 Cow::Owned(res)
314 317 }
315 318 }
316 319
317 320 #[cfg(test)]
318 321 mod tests {
319 322 use super::*;
320 323 use pretty_assertions::assert_eq;
321 324
322 325 #[test]
323 326 fn find_dirs_some() {
324 327 let mut dirs = super::find_dirs(HgPath::new(b"foo/bar/baz"));
325 328 assert_eq!(dirs.next(), Some(HgPath::new(b"foo/bar")));
326 329 assert_eq!(dirs.next(), Some(HgPath::new(b"foo")));
327 330 assert_eq!(dirs.next(), Some(HgPath::new(b"")));
328 331 assert_eq!(dirs.next(), None);
329 332 assert_eq!(dirs.next(), None);
330 333 }
331 334
332 335 #[test]
333 336 fn find_dirs_empty() {
334 337 // looks weird, but mercurial.pathutil.finddirs(b"") yields b""
335 338 let mut dirs = super::find_dirs(HgPath::new(b""));
336 339 assert_eq!(dirs.next(), Some(HgPath::new(b"")));
337 340 assert_eq!(dirs.next(), None);
338 341 assert_eq!(dirs.next(), None);
339 342 }
340 343
341 344 #[test]
342 345 fn test_find_dirs_with_base_some() {
343 346 let mut dirs = super::find_dirs_with_base(HgPath::new(b"foo/bar/baz"));
344 347 assert_eq!(
345 348 dirs.next(),
346 349 Some((HgPath::new(b"foo/bar"), HgPath::new(b"baz")))
347 350 );
348 351 assert_eq!(
349 352 dirs.next(),
350 353 Some((HgPath::new(b"foo"), HgPath::new(b"bar")))
351 354 );
352 355 assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b"foo"))));
353 356 assert_eq!(dirs.next(), None);
354 357 assert_eq!(dirs.next(), None);
355 358 }
356 359
357 360 #[test]
358 361 fn test_find_dirs_with_base_empty() {
359 362 let mut dirs = super::find_dirs_with_base(HgPath::new(b""));
360 363 assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b""))));
361 364 assert_eq!(dirs.next(), None);
362 365 assert_eq!(dirs.next(), None);
363 366 }
364 367
365 368 #[test]
366 369 fn test_canonical_path() {
367 370 let root = Path::new("/repo");
368 371 let cwd = Path::new("/dir");
369 372 let name = Path::new("filename");
370 373 assert_eq!(
371 374 canonical_path(root, cwd, name),
372 375 Err(HgPathError::NotUnderRoot {
373 376 path: PathBuf::from("/dir/filename"),
374 377 root: root.to_path_buf()
375 378 })
376 379 );
377 380
378 381 let root = Path::new("/repo");
379 382 let cwd = Path::new("/");
380 383 let name = Path::new("filename");
381 384 assert_eq!(
382 385 canonical_path(root, cwd, name),
383 386 Err(HgPathError::NotUnderRoot {
384 387 path: PathBuf::from("/filename"),
385 388 root: root.to_path_buf()
386 389 })
387 390 );
388 391
389 392 let root = Path::new("/repo");
390 393 let cwd = Path::new("/");
391 394 let name = Path::new("repo/filename");
392 395 assert_eq!(
393 396 canonical_path(root, cwd, name),
394 397 Ok(PathBuf::from("filename"))
395 398 );
396 399
397 400 let root = Path::new("/repo");
398 401 let cwd = Path::new("/repo");
399 402 let name = Path::new("filename");
400 403 assert_eq!(
401 404 canonical_path(root, cwd, name),
402 405 Ok(PathBuf::from("filename"))
403 406 );
404 407
405 408 let root = Path::new("/repo");
406 409 let cwd = Path::new("/repo/subdir");
407 410 let name = Path::new("filename");
408 411 assert_eq!(
409 412 canonical_path(root, cwd, name),
410 413 Ok(PathBuf::from("subdir/filename"))
411 414 );
412 415 }
413 416
414 417 #[test]
415 418 fn test_canonical_path_not_rooted() {
416 419 use std::fs::create_dir;
417 420 use tempfile::tempdir;
418 421
419 422 let base_dir = tempdir().unwrap();
420 423 let base_dir_path = base_dir.path();
421 424 let beneath_repo = base_dir_path.join("a");
422 425 let root = base_dir_path.join("a/b");
423 426 let out_of_repo = base_dir_path.join("c");
424 427 let under_repo_symlink = out_of_repo.join("d");
425 428
426 429 create_dir(&beneath_repo).unwrap();
427 430 create_dir(&root).unwrap();
428 431
429 432 // TODO make portable
430 433 std::os::unix::fs::symlink(&root, &out_of_repo).unwrap();
431 434
432 435 assert_eq!(
433 436 canonical_path(&root, Path::new(""), out_of_repo),
434 437 Ok(PathBuf::from(""))
435 438 );
436 439 assert_eq!(
437 440 canonical_path(&root, Path::new(""), &beneath_repo),
438 441 Err(HgPathError::NotUnderRoot {
439 442 path: beneath_repo.to_owned(),
440 443 root: root.to_owned()
441 444 })
442 445 );
443 446 assert_eq!(
444 447 canonical_path(&root, Path::new(""), &under_repo_symlink),
445 448 Ok(PathBuf::from("d"))
446 449 );
447 450 }
448 451 }
General Comments 0
You need to be logged in to leave comments. Login now