Show More
@@ -1,310 +1,311 | |||
|
1 | 1 | // layer.rs |
|
2 | 2 | // |
|
3 | 3 | // Copyright 2020 |
|
4 | 4 | // Valentin Gatien-Baron, |
|
5 | 5 | // Raphaël Gomès <rgomes@octobus.net> |
|
6 | 6 | // |
|
7 | 7 | // This software may be used and distributed according to the terms of the |
|
8 | 8 | // GNU General Public License version 2 or any later version. |
|
9 | 9 | |
|
10 | 10 | use crate::errors::HgError; |
|
11 | 11 | use crate::utils::files::{get_bytes_from_path, get_path_from_bytes}; |
|
12 | 12 | use format_bytes::{format_bytes, write_bytes, DisplayBytes}; |
|
13 | 13 | use lazy_static::lazy_static; |
|
14 | 14 | use regex::bytes::Regex; |
|
15 | 15 | use std::collections::HashMap; |
|
16 | 16 | use std::path::{Path, PathBuf}; |
|
17 | 17 | |
|
18 | 18 | lazy_static! { |
|
19 | 19 | static ref SECTION_RE: Regex = make_regex(r"^\[([^\[]+)\]"); |
|
20 | 20 | static ref ITEM_RE: Regex = make_regex(r"^([^=\s][^=]*?)\s*=\s*((.*\S)?)"); |
|
21 | 21 | /// Continuation whitespace |
|
22 | 22 | static ref CONT_RE: Regex = make_regex(r"^\s+(\S|\S.*\S)\s*$"); |
|
23 | 23 | static ref EMPTY_RE: Regex = make_regex(r"^(;|#|\s*$)"); |
|
24 | 24 | static ref COMMENT_RE: Regex = make_regex(r"^(;|#)"); |
|
25 | 25 | /// A directive that allows for removing previous entries |
|
26 | 26 | static ref UNSET_RE: Regex = make_regex(r"^%unset\s+(\S+)"); |
|
27 | 27 | /// A directive that allows for including other config files |
|
28 | 28 | static ref INCLUDE_RE: Regex = make_regex(r"^%include\s+(\S|\S.*\S)\s*$"); |
|
29 | 29 | } |
|
30 | 30 | |
|
31 | 31 | /// All config values separated by layers of precedence. |
|
32 | 32 | /// Each config source may be split in multiple layers if `%include` directives |
|
33 | 33 | /// are used. |
|
34 | 34 | /// TODO detail the general precedence |
|
35 | 35 | #[derive(Clone)] |
|
36 | 36 | pub struct ConfigLayer { |
|
37 | 37 | /// Mapping of the sections to their items |
|
38 | 38 | sections: HashMap<Vec<u8>, ConfigItem>, |
|
39 | 39 | /// All sections (and their items/values) in a layer share the same origin |
|
40 | 40 | pub origin: ConfigOrigin, |
|
41 | 41 | /// Whether this layer comes from a trusted user or group |
|
42 | 42 | pub trusted: bool, |
|
43 | 43 | } |
|
44 | 44 | |
|
45 | 45 | impl ConfigLayer { |
|
46 | 46 | pub fn new(origin: ConfigOrigin) -> Self { |
|
47 | 47 | ConfigLayer { |
|
48 | 48 | sections: HashMap::new(), |
|
49 | 49 | trusted: true, // TODO check |
|
50 | 50 | origin, |
|
51 | 51 | } |
|
52 | 52 | } |
|
53 | 53 | |
|
54 | 54 | /// Parse `--config` CLI arguments and return a layer if thereβs any |
|
55 | 55 | pub(crate) fn parse_cli_args( |
|
56 | 56 | cli_config_args: impl IntoIterator<Item = impl AsRef<[u8]>>, |
|
57 | 57 | ) -> Result<Option<Self>, ConfigError> { |
|
58 | 58 | fn parse_one(arg: &[u8]) -> Option<(Vec<u8>, Vec<u8>, Vec<u8>)> { |
|
59 | 59 | use crate::utils::SliceExt; |
|
60 | 60 | |
|
61 | 61 | let (section_and_item, value) = arg.split_2(b'=')?; |
|
62 | 62 | let (section, item) = section_and_item.trim().split_2(b'.')?; |
|
63 | 63 | Some(( |
|
64 | 64 | section.to_owned(), |
|
65 | 65 | item.to_owned(), |
|
66 | 66 | value.trim().to_owned(), |
|
67 | 67 | )) |
|
68 | 68 | } |
|
69 | 69 | |
|
70 | 70 | let mut layer = Self::new(ConfigOrigin::CommandLine); |
|
71 | 71 | for arg in cli_config_args { |
|
72 | 72 | let arg = arg.as_ref(); |
|
73 | 73 | if let Some((section, item, value)) = parse_one(arg) { |
|
74 | 74 | layer.add(section, item, value, None); |
|
75 | 75 | } else { |
|
76 | 76 | Err(HgError::abort(format!( |
|
77 | 77 | "abort: malformed --config option: '{}' \ |
|
78 | 78 | (use --config section.name=value)", |
|
79 | 79 | String::from_utf8_lossy(arg), |
|
80 | 80 | )))? |
|
81 | 81 | } |
|
82 | 82 | } |
|
83 | 83 | if layer.sections.is_empty() { |
|
84 | 84 | Ok(None) |
|
85 | 85 | } else { |
|
86 | 86 | Ok(Some(layer)) |
|
87 | 87 | } |
|
88 | 88 | } |
|
89 | 89 | |
|
90 | 90 | /// Returns whether this layer comes from `--config` CLI arguments |
|
91 | 91 | pub(crate) fn is_from_command_line(&self) -> bool { |
|
92 | 92 | if let ConfigOrigin::CommandLine = self.origin { |
|
93 | 93 | true |
|
94 | 94 | } else { |
|
95 | 95 | false |
|
96 | 96 | } |
|
97 | 97 | } |
|
98 | 98 | |
|
99 | 99 | /// Add an entry to the config, overwriting the old one if already present. |
|
100 | 100 | pub fn add( |
|
101 | 101 | &mut self, |
|
102 | 102 | section: Vec<u8>, |
|
103 | 103 | item: Vec<u8>, |
|
104 | 104 | value: Vec<u8>, |
|
105 | 105 | line: Option<usize>, |
|
106 | 106 | ) { |
|
107 | 107 | self.sections |
|
108 | 108 | .entry(section) |
|
109 | 109 | .or_insert_with(|| HashMap::new()) |
|
110 | 110 | .insert(item, ConfigValue { bytes: value, line }); |
|
111 | 111 | } |
|
112 | 112 | |
|
113 | 113 | /// Returns the config value in `<section>.<item>` if it exists |
|
114 | 114 | pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&ConfigValue> { |
|
115 | 115 | Some(self.sections.get(section)?.get(item)?) |
|
116 | 116 | } |
|
117 | 117 | |
|
118 | 118 | /// Returns the keys defined in the given section |
|
119 | 119 | pub fn iter_keys(&self, section: &[u8]) -> impl Iterator<Item = &[u8]> { |
|
120 | 120 | self.sections |
|
121 | 121 | .get(section) |
|
122 | 122 | .into_iter() |
|
123 | 123 | .flat_map(|section| section.keys().map(|vec| &**vec)) |
|
124 | 124 | } |
|
125 | 125 | |
|
126 | 126 | pub fn is_empty(&self) -> bool { |
|
127 | 127 | self.sections.is_empty() |
|
128 | 128 | } |
|
129 | 129 | |
|
130 | 130 | /// Returns a `Vec` of layers in order of precedence (so, in read order), |
|
131 | 131 | /// recursively parsing the `%include` directives if any. |
|
132 | 132 | pub fn parse(src: &Path, data: &[u8]) -> Result<Vec<Self>, ConfigError> { |
|
133 | 133 | let mut layers = vec![]; |
|
134 | 134 | |
|
135 | 135 | // Discard byte order mark if any |
|
136 | 136 | let data = if data.starts_with(b"\xef\xbb\xbf") { |
|
137 | 137 | &data[3..] |
|
138 | 138 | } else { |
|
139 | 139 | data |
|
140 | 140 | }; |
|
141 | 141 | |
|
142 | 142 | // TODO check if it's trusted |
|
143 | 143 | let mut current_layer = Self::new(ConfigOrigin::File(src.to_owned())); |
|
144 | 144 | |
|
145 | 145 | let mut lines_iter = |
|
146 | 146 | data.split(|b| *b == b'\n').enumerate().peekable(); |
|
147 | 147 | let mut section = b"".to_vec(); |
|
148 | 148 | |
|
149 | 149 | while let Some((index, bytes)) = lines_iter.next() { |
|
150 | 150 | let line = Some(index + 1); |
|
151 | 151 | if let Some(m) = INCLUDE_RE.captures(&bytes) { |
|
152 | 152 | let filename_bytes = &m[1]; |
|
153 | let filename_bytes = crate::utils::expand_vars(filename_bytes); | |
|
153 | 154 | // `Path::parent` only fails for the root directory, |
|
154 | 155 | // which `src` canβt be since weβve managed to open it as a |
|
155 | 156 | // file. |
|
156 | 157 | let dir = src |
|
157 | 158 | .parent() |
|
158 | 159 | .expect("Path::parent fail on a file weβve read"); |
|
159 | 160 | // `Path::join` with an absolute argument correctly ignores the |
|
160 | 161 | // base path |
|
161 | 162 | let filename = dir.join(&get_path_from_bytes(&filename_bytes)); |
|
162 | 163 | let data = std::fs::read(&filename).map_err(|io_error| { |
|
163 | 164 | ConfigParseError { |
|
164 | 165 | origin: ConfigOrigin::File(src.to_owned()), |
|
165 | 166 | line, |
|
166 | 167 | message: format_bytes!( |
|
167 | 168 | b"cannot include {} ({})", |
|
168 | 169 | filename_bytes, |
|
169 | 170 | format_bytes::Utf8(io_error) |
|
170 | 171 | ), |
|
171 | 172 | } |
|
172 | 173 | })?; |
|
173 | 174 | layers.push(current_layer); |
|
174 | 175 | layers.extend(Self::parse(&filename, &data)?); |
|
175 | 176 | current_layer = Self::new(ConfigOrigin::File(src.to_owned())); |
|
176 | 177 | } else if let Some(_) = EMPTY_RE.captures(&bytes) { |
|
177 | 178 | } else if let Some(m) = SECTION_RE.captures(&bytes) { |
|
178 | 179 | section = m[1].to_vec(); |
|
179 | 180 | } else if let Some(m) = ITEM_RE.captures(&bytes) { |
|
180 | 181 | let item = m[1].to_vec(); |
|
181 | 182 | let mut value = m[2].to_vec(); |
|
182 | 183 | loop { |
|
183 | 184 | match lines_iter.peek() { |
|
184 | 185 | None => break, |
|
185 | 186 | Some((_, v)) => { |
|
186 | 187 | if let Some(_) = COMMENT_RE.captures(&v) { |
|
187 | 188 | } else if let Some(_) = CONT_RE.captures(&v) { |
|
188 | 189 | value.extend(b"\n"); |
|
189 | 190 | value.extend(&m[1]); |
|
190 | 191 | } else { |
|
191 | 192 | break; |
|
192 | 193 | } |
|
193 | 194 | } |
|
194 | 195 | }; |
|
195 | 196 | lines_iter.next(); |
|
196 | 197 | } |
|
197 | 198 | current_layer.add(section.clone(), item, value, line); |
|
198 | 199 | } else if let Some(m) = UNSET_RE.captures(&bytes) { |
|
199 | 200 | if let Some(map) = current_layer.sections.get_mut(§ion) { |
|
200 | 201 | map.remove(&m[1]); |
|
201 | 202 | } |
|
202 | 203 | } else { |
|
203 | 204 | let message = if bytes.starts_with(b" ") { |
|
204 | 205 | format_bytes!(b"unexpected leading whitespace: {}", bytes) |
|
205 | 206 | } else { |
|
206 | 207 | bytes.to_owned() |
|
207 | 208 | }; |
|
208 | 209 | return Err(ConfigParseError { |
|
209 | 210 | origin: ConfigOrigin::File(src.to_owned()), |
|
210 | 211 | line, |
|
211 | 212 | message, |
|
212 | 213 | } |
|
213 | 214 | .into()); |
|
214 | 215 | } |
|
215 | 216 | } |
|
216 | 217 | if !current_layer.is_empty() { |
|
217 | 218 | layers.push(current_layer); |
|
218 | 219 | } |
|
219 | 220 | Ok(layers) |
|
220 | 221 | } |
|
221 | 222 | } |
|
222 | 223 | |
|
223 | 224 | impl DisplayBytes for ConfigLayer { |
|
224 | 225 | fn display_bytes( |
|
225 | 226 | &self, |
|
226 | 227 | out: &mut dyn std::io::Write, |
|
227 | 228 | ) -> std::io::Result<()> { |
|
228 | 229 | let mut sections: Vec<_> = self.sections.iter().collect(); |
|
229 | 230 | sections.sort_by(|e0, e1| e0.0.cmp(e1.0)); |
|
230 | 231 | |
|
231 | 232 | for (section, items) in sections.into_iter() { |
|
232 | 233 | let mut items: Vec<_> = items.into_iter().collect(); |
|
233 | 234 | items.sort_by(|e0, e1| e0.0.cmp(e1.0)); |
|
234 | 235 | |
|
235 | 236 | for (item, config_entry) in items { |
|
236 | 237 | write_bytes!( |
|
237 | 238 | out, |
|
238 | 239 | b"{}.{}={} # {}\n", |
|
239 | 240 | section, |
|
240 | 241 | item, |
|
241 | 242 | &config_entry.bytes, |
|
242 | 243 | &self.origin, |
|
243 | 244 | )? |
|
244 | 245 | } |
|
245 | 246 | } |
|
246 | 247 | Ok(()) |
|
247 | 248 | } |
|
248 | 249 | } |
|
249 | 250 | |
|
250 | 251 | /// Mapping of section item to value. |
|
251 | 252 | /// In the following: |
|
252 | 253 | /// ```text |
|
253 | 254 | /// [ui] |
|
254 | 255 | /// paginate=no |
|
255 | 256 | /// ``` |
|
256 | 257 | /// "paginate" is the section item and "no" the value. |
|
257 | 258 | pub type ConfigItem = HashMap<Vec<u8>, ConfigValue>; |
|
258 | 259 | |
|
259 | 260 | #[derive(Clone, Debug, PartialEq)] |
|
260 | 261 | pub struct ConfigValue { |
|
261 | 262 | /// The raw bytes of the value (be it from the CLI, env or from a file) |
|
262 | 263 | pub bytes: Vec<u8>, |
|
263 | 264 | /// Only present if the value comes from a file, 1-indexed. |
|
264 | 265 | pub line: Option<usize>, |
|
265 | 266 | } |
|
266 | 267 | |
|
267 | 268 | #[derive(Clone, Debug)] |
|
268 | 269 | pub enum ConfigOrigin { |
|
269 | 270 | /// From a configuration file |
|
270 | 271 | File(PathBuf), |
|
271 | 272 | /// From a `--config` CLI argument |
|
272 | 273 | CommandLine, |
|
273 | 274 | /// From environment variables like `$PAGER` or `$EDITOR` |
|
274 | 275 | Environment(Vec<u8>), |
|
275 | 276 | /* TODO cli |
|
276 | 277 | * TODO defaults (configitems.py) |
|
277 | 278 | * TODO extensions |
|
278 | 279 | * TODO Python resources? |
|
279 | 280 | * Others? */ |
|
280 | 281 | } |
|
281 | 282 | |
|
282 | 283 | impl DisplayBytes for ConfigOrigin { |
|
283 | 284 | fn display_bytes( |
|
284 | 285 | &self, |
|
285 | 286 | out: &mut dyn std::io::Write, |
|
286 | 287 | ) -> std::io::Result<()> { |
|
287 | 288 | match self { |
|
288 | 289 | ConfigOrigin::File(p) => out.write_all(&get_bytes_from_path(p)), |
|
289 | 290 | ConfigOrigin::CommandLine => out.write_all(b"--config"), |
|
290 | 291 | ConfigOrigin::Environment(e) => write_bytes!(out, b"${}", e), |
|
291 | 292 | } |
|
292 | 293 | } |
|
293 | 294 | } |
|
294 | 295 | |
|
295 | 296 | #[derive(Debug)] |
|
296 | 297 | pub struct ConfigParseError { |
|
297 | 298 | pub origin: ConfigOrigin, |
|
298 | 299 | pub line: Option<usize>, |
|
299 | 300 | pub message: Vec<u8>, |
|
300 | 301 | } |
|
301 | 302 | |
|
302 | 303 | #[derive(Debug, derive_more::From)] |
|
303 | 304 | pub enum ConfigError { |
|
304 | 305 | Parse(ConfigParseError), |
|
305 | 306 | Other(HgError), |
|
306 | 307 | } |
|
307 | 308 | |
|
308 | 309 | fn make_regex(pattern: &'static str) -> Regex { |
|
309 | 310 | Regex::new(pattern).expect("expected a valid regex") |
|
310 | 311 | } |
@@ -1,430 +1,483 | |||
|
1 | 1 | // utils module |
|
2 | 2 | // |
|
3 | 3 | // Copyright 2019 Raphaël Gomès <rgomes@octobus.net> |
|
4 | 4 | // |
|
5 | 5 | // This software may be used and distributed according to the terms of the |
|
6 | 6 | // GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | 8 | //! Contains useful functions, traits, structs, etc. for use in core. |
|
9 | 9 | |
|
10 | 10 | use crate::errors::{HgError, IoErrorContext}; |
|
11 | 11 | use crate::utils::hg_path::HgPath; |
|
12 | 12 | use im_rc::ordmap::DiffItem; |
|
13 | 13 | use im_rc::ordmap::OrdMap; |
|
14 | 14 | use std::cell::Cell; |
|
15 | 15 | use std::fmt; |
|
16 | 16 | use std::{io::Write, ops::Deref}; |
|
17 | 17 | |
|
18 | 18 | pub mod files; |
|
19 | 19 | pub mod hg_path; |
|
20 | 20 | pub mod path_auditor; |
|
21 | 21 | |
|
22 | 22 | /// Useful until rust/issues/56345 is stable |
|
23 | 23 | /// |
|
24 | 24 | /// # Examples |
|
25 | 25 | /// |
|
26 | 26 | /// ``` |
|
27 | 27 | /// use crate::hg::utils::find_slice_in_slice; |
|
28 | 28 | /// |
|
29 | 29 | /// let haystack = b"This is the haystack".to_vec(); |
|
30 | 30 | /// assert_eq!(find_slice_in_slice(&haystack, b"the"), Some(8)); |
|
31 | 31 | /// assert_eq!(find_slice_in_slice(&haystack, b"not here"), None); |
|
32 | 32 | /// ``` |
|
33 | 33 | pub fn find_slice_in_slice<T>(slice: &[T], needle: &[T]) -> Option<usize> |
|
34 | 34 | where |
|
35 | 35 | for<'a> &'a [T]: PartialEq, |
|
36 | 36 | { |
|
37 | 37 | slice |
|
38 | 38 | .windows(needle.len()) |
|
39 | 39 | .position(|window| window == needle) |
|
40 | 40 | } |
|
41 | 41 | |
|
42 | 42 | /// Replaces the `from` slice with the `to` slice inside the `buf` slice. |
|
43 | 43 | /// |
|
44 | 44 | /// # Examples |
|
45 | 45 | /// |
|
46 | 46 | /// ``` |
|
47 | 47 | /// use crate::hg::utils::replace_slice; |
|
48 | 48 | /// let mut line = b"I hate writing tests!".to_vec(); |
|
49 | 49 | /// replace_slice(&mut line, b"hate", b"love"); |
|
50 | 50 | /// assert_eq!( |
|
51 | 51 | /// line, |
|
52 | 52 | /// b"I love writing tests!".to_vec() |
|
53 | 53 | /// ); |
|
54 | 54 | /// ``` |
|
55 | 55 | pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T]) |
|
56 | 56 | where |
|
57 | 57 | T: Clone + PartialEq, |
|
58 | 58 | { |
|
59 | 59 | if buf.len() < from.len() || from.len() != to.len() { |
|
60 | 60 | return; |
|
61 | 61 | } |
|
62 | 62 | for i in 0..=buf.len() - from.len() { |
|
63 | 63 | if buf[i..].starts_with(from) { |
|
64 | 64 | buf[i..(i + from.len())].clone_from_slice(to); |
|
65 | 65 | } |
|
66 | 66 | } |
|
67 | 67 | } |
|
68 | 68 | |
|
69 | 69 | pub trait SliceExt { |
|
70 | 70 | fn trim_end_newlines(&self) -> &Self; |
|
71 | 71 | fn trim_end(&self) -> &Self; |
|
72 | 72 | fn trim_start(&self) -> &Self; |
|
73 | 73 | fn trim(&self) -> &Self; |
|
74 | 74 | fn drop_prefix(&self, needle: &Self) -> Option<&Self>; |
|
75 | 75 | fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])>; |
|
76 | 76 | } |
|
77 | 77 | |
|
78 | 78 | #[allow(clippy::trivially_copy_pass_by_ref)] |
|
79 | 79 | fn is_not_whitespace(c: &u8) -> bool { |
|
80 | 80 | !(*c as char).is_whitespace() |
|
81 | 81 | } |
|
82 | 82 | |
|
83 | 83 | impl SliceExt for [u8] { |
|
84 | 84 | fn trim_end_newlines(&self) -> &[u8] { |
|
85 | 85 | if let Some(last) = self.iter().rposition(|&byte| byte != b'\n') { |
|
86 | 86 | &self[..=last] |
|
87 | 87 | } else { |
|
88 | 88 | &[] |
|
89 | 89 | } |
|
90 | 90 | } |
|
91 | 91 | fn trim_end(&self) -> &[u8] { |
|
92 | 92 | if let Some(last) = self.iter().rposition(is_not_whitespace) { |
|
93 | 93 | &self[..=last] |
|
94 | 94 | } else { |
|
95 | 95 | &[] |
|
96 | 96 | } |
|
97 | 97 | } |
|
98 | 98 | fn trim_start(&self) -> &[u8] { |
|
99 | 99 | if let Some(first) = self.iter().position(is_not_whitespace) { |
|
100 | 100 | &self[first..] |
|
101 | 101 | } else { |
|
102 | 102 | &[] |
|
103 | 103 | } |
|
104 | 104 | } |
|
105 | 105 | |
|
106 | 106 | /// ``` |
|
107 | 107 | /// use hg::utils::SliceExt; |
|
108 | 108 | /// assert_eq!( |
|
109 | 109 | /// b" to trim ".trim(), |
|
110 | 110 | /// b"to trim" |
|
111 | 111 | /// ); |
|
112 | 112 | /// assert_eq!( |
|
113 | 113 | /// b"to trim ".trim(), |
|
114 | 114 | /// b"to trim" |
|
115 | 115 | /// ); |
|
116 | 116 | /// assert_eq!( |
|
117 | 117 | /// b" to trim".trim(), |
|
118 | 118 | /// b"to trim" |
|
119 | 119 | /// ); |
|
120 | 120 | /// ``` |
|
121 | 121 | fn trim(&self) -> &[u8] { |
|
122 | 122 | self.trim_start().trim_end() |
|
123 | 123 | } |
|
124 | 124 | |
|
125 | 125 | fn drop_prefix(&self, needle: &Self) -> Option<&Self> { |
|
126 | 126 | if self.starts_with(needle) { |
|
127 | 127 | Some(&self[needle.len()..]) |
|
128 | 128 | } else { |
|
129 | 129 | None |
|
130 | 130 | } |
|
131 | 131 | } |
|
132 | 132 | |
|
133 | 133 | fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])> { |
|
134 | 134 | let mut iter = self.splitn(2, |&byte| byte == separator); |
|
135 | 135 | let a = iter.next()?; |
|
136 | 136 | let b = iter.next()?; |
|
137 | 137 | Some((a, b)) |
|
138 | 138 | } |
|
139 | 139 | } |
|
140 | 140 | |
|
141 | 141 | pub trait Escaped { |
|
142 | 142 | /// Return bytes escaped for display to the user |
|
143 | 143 | fn escaped_bytes(&self) -> Vec<u8>; |
|
144 | 144 | } |
|
145 | 145 | |
|
146 | 146 | impl Escaped for u8 { |
|
147 | 147 | fn escaped_bytes(&self) -> Vec<u8> { |
|
148 | 148 | let mut acc = vec![]; |
|
149 | 149 | match self { |
|
150 | 150 | c @ b'\'' | c @ b'\\' => { |
|
151 | 151 | acc.push(b'\\'); |
|
152 | 152 | acc.push(*c); |
|
153 | 153 | } |
|
154 | 154 | b'\t' => { |
|
155 | 155 | acc.extend(br"\\t"); |
|
156 | 156 | } |
|
157 | 157 | b'\n' => { |
|
158 | 158 | acc.extend(br"\\n"); |
|
159 | 159 | } |
|
160 | 160 | b'\r' => { |
|
161 | 161 | acc.extend(br"\\r"); |
|
162 | 162 | } |
|
163 | 163 | c if (*c < b' ' || *c >= 127) => { |
|
164 | 164 | write!(acc, "\\x{:x}", self).unwrap(); |
|
165 | 165 | } |
|
166 | 166 | c => { |
|
167 | 167 | acc.push(*c); |
|
168 | 168 | } |
|
169 | 169 | } |
|
170 | 170 | acc |
|
171 | 171 | } |
|
172 | 172 | } |
|
173 | 173 | |
|
174 | 174 | impl<'a, T: Escaped> Escaped for &'a [T] { |
|
175 | 175 | fn escaped_bytes(&self) -> Vec<u8> { |
|
176 | 176 | self.iter().flat_map(Escaped::escaped_bytes).collect() |
|
177 | 177 | } |
|
178 | 178 | } |
|
179 | 179 | |
|
180 | 180 | impl<T: Escaped> Escaped for Vec<T> { |
|
181 | 181 | fn escaped_bytes(&self) -> Vec<u8> { |
|
182 | 182 | self.deref().escaped_bytes() |
|
183 | 183 | } |
|
184 | 184 | } |
|
185 | 185 | |
|
186 | 186 | impl<'a> Escaped for &'a HgPath { |
|
187 | 187 | fn escaped_bytes(&self) -> Vec<u8> { |
|
188 | 188 | self.as_bytes().escaped_bytes() |
|
189 | 189 | } |
|
190 | 190 | } |
|
191 | 191 | |
|
192 | 192 | // TODO: use the str method when we require Rust 1.45 |
|
193 | 193 | pub(crate) fn strip_suffix<'a>(s: &'a str, suffix: &str) -> Option<&'a str> { |
|
194 | 194 | if s.ends_with(suffix) { |
|
195 | 195 | Some(&s[..s.len() - suffix.len()]) |
|
196 | 196 | } else { |
|
197 | 197 | None |
|
198 | 198 | } |
|
199 | 199 | } |
|
200 | 200 | |
|
201 | 201 | #[cfg(unix)] |
|
202 | 202 | pub fn shell_quote(value: &[u8]) -> Vec<u8> { |
|
203 | 203 | // TODO: Use the `matches!` macro when we require Rust 1.42+ |
|
204 | 204 | if value.iter().all(|&byte| match byte { |
|
205 | 205 | b'a'..=b'z' |
|
206 | 206 | | b'A'..=b'Z' |
|
207 | 207 | | b'0'..=b'9' |
|
208 | 208 | | b'.' |
|
209 | 209 | | b'_' |
|
210 | 210 | | b'/' |
|
211 | 211 | | b'+' |
|
212 | 212 | | b'-' => true, |
|
213 | 213 | _ => false, |
|
214 | 214 | }) { |
|
215 | 215 | value.to_owned() |
|
216 | 216 | } else { |
|
217 | 217 | let mut quoted = Vec::with_capacity(value.len() + 2); |
|
218 | 218 | quoted.push(b'\''); |
|
219 | 219 | for &byte in value { |
|
220 | 220 | if byte == b'\'' { |
|
221 | 221 | quoted.push(b'\\'); |
|
222 | 222 | } |
|
223 | 223 | quoted.push(byte); |
|
224 | 224 | } |
|
225 | 225 | quoted.push(b'\''); |
|
226 | 226 | quoted |
|
227 | 227 | } |
|
228 | 228 | } |
|
229 | 229 | |
|
230 | 230 | pub fn current_dir() -> Result<std::path::PathBuf, HgError> { |
|
231 | 231 | std::env::current_dir().map_err(|error| HgError::IoError { |
|
232 | 232 | error, |
|
233 | 233 | context: IoErrorContext::CurrentDir, |
|
234 | 234 | }) |
|
235 | 235 | } |
|
236 | 236 | |
|
237 | 237 | pub fn current_exe() -> Result<std::path::PathBuf, HgError> { |
|
238 | 238 | std::env::current_exe().map_err(|error| HgError::IoError { |
|
239 | 239 | error, |
|
240 | 240 | context: IoErrorContext::CurrentExe, |
|
241 | 241 | }) |
|
242 | 242 | } |
|
243 | 243 | |
|
244 | /// Expand `$FOO` and `${FOO}` environment variables in the given byte string | |
|
245 | pub fn expand_vars(s: &[u8]) -> std::borrow::Cow<[u8]> { | |
|
246 | lazy_static::lazy_static! { | |
|
247 | /// https://github.com/python/cpython/blob/3.9/Lib/posixpath.py#L301 | |
|
248 | /// The `x` makes whitespace ignored. | |
|
249 | /// `-u` disables the Unicode flag, which makes `\w` like Python with the ASCII flag. | |
|
250 | static ref VAR_RE: regex::bytes::Regex = | |
|
251 | regex::bytes::Regex::new(r"(?x-u) | |
|
252 | \$ | |
|
253 | (?: | |
|
254 | (\w+) | |
|
255 | | | |
|
256 | \{ | |
|
257 | ([^}]*) | |
|
258 | \} | |
|
259 | ) | |
|
260 | ").unwrap(); | |
|
261 | } | |
|
262 | VAR_RE.replace_all(s, |captures: ®ex::bytes::Captures| { | |
|
263 | let var_name = files::get_os_str_from_bytes( | |
|
264 | captures | |
|
265 | .get(1) | |
|
266 | .or_else(|| captures.get(2)) | |
|
267 | .expect("either side of `|` must participate in match") | |
|
268 | .as_bytes(), | |
|
269 | ); | |
|
270 | std::env::var_os(var_name) | |
|
271 | .map(files::get_bytes_from_os_str) | |
|
272 | .unwrap_or_else(|| { | |
|
273 | // Referencing an environment variable that does not exist. | |
|
274 | // Leave the $FOO reference as-is. | |
|
275 | captures[0].to_owned() | |
|
276 | }) | |
|
277 | }) | |
|
278 | } | |
|
279 | ||
|
280 | #[test] | |
|
281 | fn test_expand_vars() { | |
|
282 | // Modifying process-global state in a test isnβt great, | |
|
283 | // but hopefully this wonβt collide with anything. | |
|
284 | std::env::set_var("TEST_EXPAND_VAR", "1"); | |
|
285 | assert_eq!( | |
|
286 | expand_vars(b"before/$TEST_EXPAND_VAR/after"), | |
|
287 | &b"before/1/after"[..] | |
|
288 | ); | |
|
289 | assert_eq!( | |
|
290 | expand_vars(b"before${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}${TEST_EXPAND_VAR}after"), | |
|
291 | &b"before111after"[..] | |
|
292 | ); | |
|
293 | let s = b"before $SOME_LONG_NAME_THAT_WE_ASSUME_IS_NOT_AN_ACTUAL_ENV_VAR after"; | |
|
294 | assert_eq!(expand_vars(s), &s[..]); | |
|
295 | } | |
|
296 | ||
|
244 | 297 | pub(crate) enum MergeResult<V> { |
|
245 | 298 | UseLeftValue, |
|
246 | 299 | UseRightValue, |
|
247 | 300 | UseNewValue(V), |
|
248 | 301 | } |
|
249 | 302 | |
|
250 | 303 | /// Return the union of the two given maps, |
|
251 | 304 | /// calling `merge(key, left_value, right_value)` to resolve keys that exist in |
|
252 | 305 | /// both. |
|
253 | 306 | /// |
|
254 | 307 | /// CC https://github.com/bodil/im-rs/issues/166 |
|
255 | 308 | pub(crate) fn ordmap_union_with_merge<K, V>( |
|
256 | 309 | left: OrdMap<K, V>, |
|
257 | 310 | right: OrdMap<K, V>, |
|
258 | 311 | mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>, |
|
259 | 312 | ) -> OrdMap<K, V> |
|
260 | 313 | where |
|
261 | 314 | K: Clone + Ord, |
|
262 | 315 | V: Clone + PartialEq, |
|
263 | 316 | { |
|
264 | 317 | if left.ptr_eq(&right) { |
|
265 | 318 | // One of the two maps is an unmodified clone of the other |
|
266 | 319 | left |
|
267 | 320 | } else if left.len() / 2 > right.len() { |
|
268 | 321 | // When two maps have different sizes, |
|
269 | 322 | // their size difference is a lower bound on |
|
270 | 323 | // how many keys of the larger map are not also in the smaller map. |
|
271 | 324 | // This in turn is a lower bound on the number of differences in |
|
272 | 325 | // `OrdMap::diff` and the "amount of work" that would be done |
|
273 | 326 | // by `ordmap_union_with_merge_by_diff`. |
|
274 | 327 | // |
|
275 | 328 | // Here `left` is more than twice the size of `right`, |
|
276 | 329 | // so the number of differences is more than the total size of |
|
277 | 330 | // `right`. Therefore an algorithm based on iterating `right` |
|
278 | 331 | // is more efficient. |
|
279 | 332 | // |
|
280 | 333 | // This helps a lot when a tiny (or empty) map is merged |
|
281 | 334 | // with a large one. |
|
282 | 335 | ordmap_union_with_merge_by_iter(left, right, merge) |
|
283 | 336 | } else if left.len() < right.len() / 2 { |
|
284 | 337 | // Same as above but with `left` and `right` swapped |
|
285 | 338 | ordmap_union_with_merge_by_iter(right, left, |key, a, b| { |
|
286 | 339 | // Also swapped in `merge` arguments: |
|
287 | 340 | match merge(key, b, a) { |
|
288 | 341 | MergeResult::UseNewValue(v) => MergeResult::UseNewValue(v), |
|
289 | 342 | // β¦ and swap back in `merge` result: |
|
290 | 343 | MergeResult::UseLeftValue => MergeResult::UseRightValue, |
|
291 | 344 | MergeResult::UseRightValue => MergeResult::UseLeftValue, |
|
292 | 345 | } |
|
293 | 346 | }) |
|
294 | 347 | } else { |
|
295 | 348 | // For maps of similar size, use the algorithm based on `OrdMap::diff` |
|
296 | 349 | ordmap_union_with_merge_by_diff(left, right, merge) |
|
297 | 350 | } |
|
298 | 351 | } |
|
299 | 352 | |
|
300 | 353 | /// Efficient if `right` is much smaller than `left` |
|
301 | 354 | fn ordmap_union_with_merge_by_iter<K, V>( |
|
302 | 355 | mut left: OrdMap<K, V>, |
|
303 | 356 | right: OrdMap<K, V>, |
|
304 | 357 | mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>, |
|
305 | 358 | ) -> OrdMap<K, V> |
|
306 | 359 | where |
|
307 | 360 | K: Clone + Ord, |
|
308 | 361 | V: Clone, |
|
309 | 362 | { |
|
310 | 363 | for (key, right_value) in right { |
|
311 | 364 | match left.get(&key) { |
|
312 | 365 | None => { |
|
313 | 366 | left.insert(key, right_value); |
|
314 | 367 | } |
|
315 | 368 | Some(left_value) => match merge(&key, left_value, &right_value) { |
|
316 | 369 | MergeResult::UseLeftValue => {} |
|
317 | 370 | MergeResult::UseRightValue => { |
|
318 | 371 | left.insert(key, right_value); |
|
319 | 372 | } |
|
320 | 373 | MergeResult::UseNewValue(new_value) => { |
|
321 | 374 | left.insert(key, new_value); |
|
322 | 375 | } |
|
323 | 376 | }, |
|
324 | 377 | } |
|
325 | 378 | } |
|
326 | 379 | left |
|
327 | 380 | } |
|
328 | 381 | |
|
329 | 382 | /// Fallback when both maps are of similar size |
|
330 | 383 | fn ordmap_union_with_merge_by_diff<K, V>( |
|
331 | 384 | mut left: OrdMap<K, V>, |
|
332 | 385 | mut right: OrdMap<K, V>, |
|
333 | 386 | mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>, |
|
334 | 387 | ) -> OrdMap<K, V> |
|
335 | 388 | where |
|
336 | 389 | K: Clone + Ord, |
|
337 | 390 | V: Clone + PartialEq, |
|
338 | 391 | { |
|
339 | 392 | // (key, value) pairs that would need to be inserted in either map |
|
340 | 393 | // in order to turn it into the union. |
|
341 | 394 | // |
|
342 | 395 | // TODO: if/when https://github.com/bodil/im-rs/pull/168 is accepted, |
|
343 | 396 | // change these from `Vec<(K, V)>` to `Vec<(&K, Cow<V>)>` |
|
344 | 397 | // with `left_updates` only borrowing from `right` and `right_updates` from |
|
345 | 398 | // `left`, and with `Cow::Owned` used for `MergeResult::UseNewValue`. |
|
346 | 399 | // |
|
347 | 400 | // This would allow moving all `.clone()` calls to after weβve decided |
|
348 | 401 | // which of `right_updates` or `left_updates` to use |
|
349 | 402 | // (value ones becoming `Cow::into_owned`), |
|
350 | 403 | // and avoid making clones we donβt end up using. |
|
351 | 404 | let mut left_updates = Vec::new(); |
|
352 | 405 | let mut right_updates = Vec::new(); |
|
353 | 406 | |
|
354 | 407 | for difference in left.diff(&right) { |
|
355 | 408 | match difference { |
|
356 | 409 | DiffItem::Add(key, value) => { |
|
357 | 410 | left_updates.push((key.clone(), value.clone())) |
|
358 | 411 | } |
|
359 | 412 | DiffItem::Remove(key, value) => { |
|
360 | 413 | right_updates.push((key.clone(), value.clone())) |
|
361 | 414 | } |
|
362 | 415 | DiffItem::Update { |
|
363 | 416 | old: (key, left_value), |
|
364 | 417 | new: (_, right_value), |
|
365 | 418 | } => match merge(key, left_value, right_value) { |
|
366 | 419 | MergeResult::UseLeftValue => { |
|
367 | 420 | right_updates.push((key.clone(), left_value.clone())) |
|
368 | 421 | } |
|
369 | 422 | MergeResult::UseRightValue => { |
|
370 | 423 | left_updates.push((key.clone(), right_value.clone())) |
|
371 | 424 | } |
|
372 | 425 | MergeResult::UseNewValue(new_value) => { |
|
373 | 426 | left_updates.push((key.clone(), new_value.clone())); |
|
374 | 427 | right_updates.push((key.clone(), new_value)) |
|
375 | 428 | } |
|
376 | 429 | }, |
|
377 | 430 | } |
|
378 | 431 | } |
|
379 | 432 | if left_updates.len() < right_updates.len() { |
|
380 | 433 | for (key, value) in left_updates { |
|
381 | 434 | left.insert(key, value); |
|
382 | 435 | } |
|
383 | 436 | left |
|
384 | 437 | } else { |
|
385 | 438 | for (key, value) in right_updates { |
|
386 | 439 | right.insert(key, value); |
|
387 | 440 | } |
|
388 | 441 | right |
|
389 | 442 | } |
|
390 | 443 | } |
|
391 | 444 | |
|
392 | 445 | /// Join items of the iterable with the given separator, similar to Pythonβs |
|
393 | 446 | /// `separator.join(iter)`. |
|
394 | 447 | /// |
|
395 | 448 | /// Formatting the return value consumes the iterator. |
|
396 | 449 | /// Formatting it again will produce an empty string. |
|
397 | 450 | pub fn join_display( |
|
398 | 451 | iter: impl IntoIterator<Item = impl fmt::Display>, |
|
399 | 452 | separator: impl fmt::Display, |
|
400 | 453 | ) -> impl fmt::Display { |
|
401 | 454 | JoinDisplay { |
|
402 | 455 | iter: Cell::new(Some(iter.into_iter())), |
|
403 | 456 | separator, |
|
404 | 457 | } |
|
405 | 458 | } |
|
406 | 459 | |
|
407 | 460 | struct JoinDisplay<I, S> { |
|
408 | 461 | iter: Cell<Option<I>>, |
|
409 | 462 | separator: S, |
|
410 | 463 | } |
|
411 | 464 | |
|
412 | 465 | impl<I, T, S> fmt::Display for JoinDisplay<I, S> |
|
413 | 466 | where |
|
414 | 467 | I: Iterator<Item = T>, |
|
415 | 468 | T: fmt::Display, |
|
416 | 469 | S: fmt::Display, |
|
417 | 470 | { |
|
418 | 471 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
|
419 | 472 | if let Some(mut iter) = self.iter.take() { |
|
420 | 473 | if let Some(first) = iter.next() { |
|
421 | 474 | first.fmt(f)?; |
|
422 | 475 | } |
|
423 | 476 | for value in iter { |
|
424 | 477 | self.separator.fmt(f)?; |
|
425 | 478 | value.fmt(f)?; |
|
426 | 479 | } |
|
427 | 480 | } |
|
428 | 481 | Ok(()) |
|
429 | 482 | } |
|
430 | 483 | } |
@@ -1,448 +1,451 | |||
|
1 | 1 | // files.rs |
|
2 | 2 | // |
|
3 | 3 | // Copyright 2019 |
|
4 | 4 | // Raphaël Gomès <rgomes@octobus.net>, |
|
5 | 5 | // Yuya Nishihara <yuya@tcha.org> |
|
6 | 6 | // |
|
7 | 7 | // This software may be used and distributed according to the terms of the |
|
8 | 8 | // GNU General Public License version 2 or any later version. |
|
9 | 9 | |
|
10 | 10 | //! Functions for fiddling with files. |
|
11 | 11 | |
|
12 | 12 | use crate::utils::{ |
|
13 | 13 | hg_path::{path_to_hg_path_buf, HgPath, HgPathBuf, HgPathError}, |
|
14 | 14 | path_auditor::PathAuditor, |
|
15 | 15 | replace_slice, |
|
16 | 16 | }; |
|
17 | 17 | use lazy_static::lazy_static; |
|
18 | 18 | use same_file::is_same_file; |
|
19 | 19 | use std::borrow::{Cow, ToOwned}; |
|
20 | 20 | use std::ffi::OsStr; |
|
21 | 21 | use std::fs::Metadata; |
|
22 | 22 | use std::iter::FusedIterator; |
|
23 | 23 | use std::ops::Deref; |
|
24 | 24 | use std::path::{Path, PathBuf}; |
|
25 | 25 | |
|
26 |
pub fn get_ |
|
|
26 | pub fn get_os_str_from_bytes(bytes: &[u8]) -> &OsStr { | |
|
27 | 27 | let os_str; |
|
28 | 28 | #[cfg(unix)] |
|
29 | 29 | { |
|
30 | 30 | use std::os::unix::ffi::OsStrExt; |
|
31 | 31 | os_str = std::ffi::OsStr::from_bytes(bytes); |
|
32 | 32 | } |
|
33 | 33 | // TODO Handle other platforms |
|
34 | 34 | // TODO: convert from WTF8 to Windows MBCS (ANSI encoding). |
|
35 | 35 | // Perhaps, the return type would have to be Result<PathBuf>. |
|
36 | os_str | |
|
37 | } | |
|
36 | 38 | |
|
37 | Path::new(os_str) | |
|
39 | pub fn get_path_from_bytes(bytes: &[u8]) -> &Path { | |
|
40 | Path::new(get_os_str_from_bytes(bytes)) | |
|
38 | 41 | } |
|
39 | 42 | |
|
40 | 43 | // TODO: need to convert from WTF8 to MBCS bytes on Windows. |
|
41 | 44 | // that's why Vec<u8> is returned. |
|
42 | 45 | #[cfg(unix)] |
|
43 | 46 | pub fn get_bytes_from_path(path: impl AsRef<Path>) -> Vec<u8> { |
|
44 | 47 | get_bytes_from_os_str(path.as_ref()) |
|
45 | 48 | } |
|
46 | 49 | |
|
47 | 50 | #[cfg(unix)] |
|
48 | 51 | pub fn get_bytes_from_os_str(str: impl AsRef<OsStr>) -> Vec<u8> { |
|
49 | 52 | use std::os::unix::ffi::OsStrExt; |
|
50 | 53 | str.as_ref().as_bytes().to_vec() |
|
51 | 54 | } |
|
52 | 55 | |
|
53 | 56 | /// An iterator over repository path yielding itself and its ancestors. |
|
54 | 57 | #[derive(Copy, Clone, Debug)] |
|
55 | 58 | pub struct Ancestors<'a> { |
|
56 | 59 | next: Option<&'a HgPath>, |
|
57 | 60 | } |
|
58 | 61 | |
|
59 | 62 | impl<'a> Iterator for Ancestors<'a> { |
|
60 | 63 | type Item = &'a HgPath; |
|
61 | 64 | |
|
62 | 65 | fn next(&mut self) -> Option<Self::Item> { |
|
63 | 66 | let next = self.next; |
|
64 | 67 | self.next = match self.next { |
|
65 | 68 | Some(s) if s.is_empty() => None, |
|
66 | 69 | Some(s) => { |
|
67 | 70 | let p = s.bytes().rposition(|c| *c == b'/').unwrap_or(0); |
|
68 | 71 | Some(HgPath::new(&s.as_bytes()[..p])) |
|
69 | 72 | } |
|
70 | 73 | None => None, |
|
71 | 74 | }; |
|
72 | 75 | next |
|
73 | 76 | } |
|
74 | 77 | } |
|
75 | 78 | |
|
76 | 79 | impl<'a> FusedIterator for Ancestors<'a> {} |
|
77 | 80 | |
|
78 | 81 | /// An iterator over repository path yielding itself and its ancestors. |
|
79 | 82 | #[derive(Copy, Clone, Debug)] |
|
80 | 83 | pub(crate) struct AncestorsWithBase<'a> { |
|
81 | 84 | next: Option<(&'a HgPath, &'a HgPath)>, |
|
82 | 85 | } |
|
83 | 86 | |
|
84 | 87 | impl<'a> Iterator for AncestorsWithBase<'a> { |
|
85 | 88 | type Item = (&'a HgPath, &'a HgPath); |
|
86 | 89 | |
|
87 | 90 | fn next(&mut self) -> Option<Self::Item> { |
|
88 | 91 | let next = self.next; |
|
89 | 92 | self.next = match self.next { |
|
90 | 93 | Some((s, _)) if s.is_empty() => None, |
|
91 | 94 | Some((s, _)) => Some(s.split_filename()), |
|
92 | 95 | None => None, |
|
93 | 96 | }; |
|
94 | 97 | next |
|
95 | 98 | } |
|
96 | 99 | } |
|
97 | 100 | |
|
98 | 101 | impl<'a> FusedIterator for AncestorsWithBase<'a> {} |
|
99 | 102 | |
|
100 | 103 | /// Returns an iterator yielding ancestor directories of the given repository |
|
101 | 104 | /// path. |
|
102 | 105 | /// |
|
103 | 106 | /// The path is separated by '/', and must not start with '/'. |
|
104 | 107 | /// |
|
105 | 108 | /// The path itself isn't included unless it is b"" (meaning the root |
|
106 | 109 | /// directory.) |
|
107 | 110 | pub fn find_dirs(path: &HgPath) -> Ancestors { |
|
108 | 111 | let mut dirs = Ancestors { next: Some(path) }; |
|
109 | 112 | if !path.is_empty() { |
|
110 | 113 | dirs.next(); // skip itself |
|
111 | 114 | } |
|
112 | 115 | dirs |
|
113 | 116 | } |
|
114 | 117 | |
|
115 | 118 | /// Returns an iterator yielding ancestor directories of the given repository |
|
116 | 119 | /// path. |
|
117 | 120 | /// |
|
118 | 121 | /// The path is separated by '/', and must not start with '/'. |
|
119 | 122 | /// |
|
120 | 123 | /// The path itself isn't included unless it is b"" (meaning the root |
|
121 | 124 | /// directory.) |
|
122 | 125 | pub(crate) fn find_dirs_with_base(path: &HgPath) -> AncestorsWithBase { |
|
123 | 126 | let mut dirs = AncestorsWithBase { |
|
124 | 127 | next: Some((path, HgPath::new(b""))), |
|
125 | 128 | }; |
|
126 | 129 | if !path.is_empty() { |
|
127 | 130 | dirs.next(); // skip itself |
|
128 | 131 | } |
|
129 | 132 | dirs |
|
130 | 133 | } |
|
131 | 134 | |
|
132 | 135 | /// TODO more than ASCII? |
|
133 | 136 | pub fn normalize_case(path: &HgPath) -> HgPathBuf { |
|
134 | 137 | #[cfg(windows)] // NTFS compares via upper() |
|
135 | 138 | return path.to_ascii_uppercase(); |
|
136 | 139 | #[cfg(unix)] |
|
137 | 140 | path.to_ascii_lowercase() |
|
138 | 141 | } |
|
139 | 142 | |
|
140 | 143 | lazy_static! { |
|
141 | 144 | static ref IGNORED_CHARS: Vec<Vec<u8>> = { |
|
142 | 145 | [ |
|
143 | 146 | 0x200c, 0x200d, 0x200e, 0x200f, 0x202a, 0x202b, 0x202c, 0x202d, |
|
144 | 147 | 0x202e, 0x206a, 0x206b, 0x206c, 0x206d, 0x206e, 0x206f, 0xfeff, |
|
145 | 148 | ] |
|
146 | 149 | .iter() |
|
147 | 150 | .map(|code| { |
|
148 | 151 | std::char::from_u32(*code) |
|
149 | 152 | .unwrap() |
|
150 | 153 | .encode_utf8(&mut [0; 3]) |
|
151 | 154 | .bytes() |
|
152 | 155 | .collect() |
|
153 | 156 | }) |
|
154 | 157 | .collect() |
|
155 | 158 | }; |
|
156 | 159 | } |
|
157 | 160 | |
|
158 | 161 | fn hfs_ignore_clean(bytes: &[u8]) -> Vec<u8> { |
|
159 | 162 | let mut buf = bytes.to_owned(); |
|
160 | 163 | let needs_escaping = bytes.iter().any(|b| *b == b'\xe2' || *b == b'\xef'); |
|
161 | 164 | if needs_escaping { |
|
162 | 165 | for forbidden in IGNORED_CHARS.iter() { |
|
163 | 166 | replace_slice(&mut buf, forbidden, &[]) |
|
164 | 167 | } |
|
165 | 168 | buf |
|
166 | 169 | } else { |
|
167 | 170 | buf |
|
168 | 171 | } |
|
169 | 172 | } |
|
170 | 173 | |
|
171 | 174 | pub fn lower_clean(bytes: &[u8]) -> Vec<u8> { |
|
172 | 175 | hfs_ignore_clean(&bytes.to_ascii_lowercase()) |
|
173 | 176 | } |
|
174 | 177 | |
|
175 | 178 | #[derive(Eq, PartialEq, Ord, PartialOrd, Copy, Clone)] |
|
176 | 179 | pub struct HgMetadata { |
|
177 | 180 | pub st_dev: u64, |
|
178 | 181 | pub st_mode: u32, |
|
179 | 182 | pub st_nlink: u64, |
|
180 | 183 | pub st_size: u64, |
|
181 | 184 | pub st_mtime: i64, |
|
182 | 185 | pub st_ctime: i64, |
|
183 | 186 | } |
|
184 | 187 | |
|
185 | 188 | // TODO support other plaforms |
|
186 | 189 | #[cfg(unix)] |
|
187 | 190 | impl HgMetadata { |
|
188 | 191 | pub fn from_metadata(metadata: Metadata) -> Self { |
|
189 | 192 | use std::os::unix::fs::MetadataExt; |
|
190 | 193 | Self { |
|
191 | 194 | st_dev: metadata.dev(), |
|
192 | 195 | st_mode: metadata.mode(), |
|
193 | 196 | st_nlink: metadata.nlink(), |
|
194 | 197 | st_size: metadata.size(), |
|
195 | 198 | st_mtime: metadata.mtime(), |
|
196 | 199 | st_ctime: metadata.ctime(), |
|
197 | 200 | } |
|
198 | 201 | } |
|
199 | 202 | } |
|
200 | 203 | |
|
201 | 204 | /// Returns the canonical path of `name`, given `cwd` and `root` |
|
202 | 205 | pub fn canonical_path( |
|
203 | 206 | root: impl AsRef<Path>, |
|
204 | 207 | cwd: impl AsRef<Path>, |
|
205 | 208 | name: impl AsRef<Path>, |
|
206 | 209 | ) -> Result<PathBuf, HgPathError> { |
|
207 | 210 | // TODO add missing normalization for other platforms |
|
208 | 211 | let root = root.as_ref(); |
|
209 | 212 | let cwd = cwd.as_ref(); |
|
210 | 213 | let name = name.as_ref(); |
|
211 | 214 | |
|
212 | 215 | let name = if !name.is_absolute() { |
|
213 | 216 | root.join(&cwd).join(&name) |
|
214 | 217 | } else { |
|
215 | 218 | name.to_owned() |
|
216 | 219 | }; |
|
217 | 220 | let auditor = PathAuditor::new(&root); |
|
218 | 221 | if name != root && name.starts_with(&root) { |
|
219 | 222 | let name = name.strip_prefix(&root).unwrap(); |
|
220 | 223 | auditor.audit_path(path_to_hg_path_buf(name)?)?; |
|
221 | 224 | Ok(name.to_owned()) |
|
222 | 225 | } else if name == root { |
|
223 | 226 | Ok("".into()) |
|
224 | 227 | } else { |
|
225 | 228 | // Determine whether `name' is in the hierarchy at or beneath `root', |
|
226 | 229 | // by iterating name=name.parent() until it returns `None` (can't |
|
227 | 230 | // check name == '/', because that doesn't work on windows). |
|
228 | 231 | let mut name = name.deref(); |
|
229 | 232 | let original_name = name.to_owned(); |
|
230 | 233 | loop { |
|
231 | 234 | let same = is_same_file(&name, &root).unwrap_or(false); |
|
232 | 235 | if same { |
|
233 | 236 | if name == original_name { |
|
234 | 237 | // `name` was actually the same as root (maybe a symlink) |
|
235 | 238 | return Ok("".into()); |
|
236 | 239 | } |
|
237 | 240 | // `name` is a symlink to root, so `original_name` is under |
|
238 | 241 | // root |
|
239 | 242 | let rel_path = original_name.strip_prefix(&name).unwrap(); |
|
240 | 243 | auditor.audit_path(path_to_hg_path_buf(&rel_path)?)?; |
|
241 | 244 | return Ok(rel_path.to_owned()); |
|
242 | 245 | } |
|
243 | 246 | name = match name.parent() { |
|
244 | 247 | None => break, |
|
245 | 248 | Some(p) => p, |
|
246 | 249 | }; |
|
247 | 250 | } |
|
248 | 251 | // TODO hint to the user about using --cwd |
|
249 | 252 | // Bubble up the responsibility to Python for now |
|
250 | 253 | Err(HgPathError::NotUnderRoot { |
|
251 | 254 | path: original_name.to_owned(), |
|
252 | 255 | root: root.to_owned(), |
|
253 | 256 | }) |
|
254 | 257 | } |
|
255 | 258 | } |
|
256 | 259 | |
|
257 | 260 | /// Returns the representation of the path relative to the current working |
|
258 | 261 | /// directory for display purposes. |
|
259 | 262 | /// |
|
260 | 263 | /// `cwd` is a `HgPath`, so it is considered relative to the root directory |
|
261 | 264 | /// of the repository. |
|
262 | 265 | /// |
|
263 | 266 | /// # Examples |
|
264 | 267 | /// |
|
265 | 268 | /// ``` |
|
266 | 269 | /// use hg::utils::hg_path::HgPath; |
|
267 | 270 | /// use hg::utils::files::relativize_path; |
|
268 | 271 | /// use std::borrow::Cow; |
|
269 | 272 | /// |
|
270 | 273 | /// let file = HgPath::new(b"nested/file"); |
|
271 | 274 | /// let cwd = HgPath::new(b""); |
|
272 | 275 | /// assert_eq!(relativize_path(file, cwd), Cow::Borrowed(b"nested/file")); |
|
273 | 276 | /// |
|
274 | 277 | /// let cwd = HgPath::new(b"nested"); |
|
275 | 278 | /// assert_eq!(relativize_path(file, cwd), Cow::Borrowed(b"file")); |
|
276 | 279 | /// |
|
277 | 280 | /// let cwd = HgPath::new(b"other"); |
|
278 | 281 | /// assert_eq!(relativize_path(file, cwd), Cow::Borrowed(b"../nested/file")); |
|
279 | 282 | /// ``` |
|
280 | 283 | pub fn relativize_path(path: &HgPath, cwd: impl AsRef<HgPath>) -> Cow<[u8]> { |
|
281 | 284 | if cwd.as_ref().is_empty() { |
|
282 | 285 | Cow::Borrowed(path.as_bytes()) |
|
283 | 286 | } else { |
|
284 | 287 | let mut res: Vec<u8> = Vec::new(); |
|
285 | 288 | let mut path_iter = path.as_bytes().split(|b| *b == b'/').peekable(); |
|
286 | 289 | let mut cwd_iter = |
|
287 | 290 | cwd.as_ref().as_bytes().split(|b| *b == b'/').peekable(); |
|
288 | 291 | loop { |
|
289 | 292 | match (path_iter.peek(), cwd_iter.peek()) { |
|
290 | 293 | (Some(a), Some(b)) if a == b => (), |
|
291 | 294 | _ => break, |
|
292 | 295 | } |
|
293 | 296 | path_iter.next(); |
|
294 | 297 | cwd_iter.next(); |
|
295 | 298 | } |
|
296 | 299 | let mut need_sep = false; |
|
297 | 300 | for _ in cwd_iter { |
|
298 | 301 | if need_sep { |
|
299 | 302 | res.extend(b"/") |
|
300 | 303 | } else { |
|
301 | 304 | need_sep = true |
|
302 | 305 | }; |
|
303 | 306 | res.extend(b".."); |
|
304 | 307 | } |
|
305 | 308 | for c in path_iter { |
|
306 | 309 | if need_sep { |
|
307 | 310 | res.extend(b"/") |
|
308 | 311 | } else { |
|
309 | 312 | need_sep = true |
|
310 | 313 | }; |
|
311 | 314 | res.extend(c); |
|
312 | 315 | } |
|
313 | 316 | Cow::Owned(res) |
|
314 | 317 | } |
|
315 | 318 | } |
|
316 | 319 | |
|
317 | 320 | #[cfg(test)] |
|
318 | 321 | mod tests { |
|
319 | 322 | use super::*; |
|
320 | 323 | use pretty_assertions::assert_eq; |
|
321 | 324 | |
|
322 | 325 | #[test] |
|
323 | 326 | fn find_dirs_some() { |
|
324 | 327 | let mut dirs = super::find_dirs(HgPath::new(b"foo/bar/baz")); |
|
325 | 328 | assert_eq!(dirs.next(), Some(HgPath::new(b"foo/bar"))); |
|
326 | 329 | assert_eq!(dirs.next(), Some(HgPath::new(b"foo"))); |
|
327 | 330 | assert_eq!(dirs.next(), Some(HgPath::new(b""))); |
|
328 | 331 | assert_eq!(dirs.next(), None); |
|
329 | 332 | assert_eq!(dirs.next(), None); |
|
330 | 333 | } |
|
331 | 334 | |
|
332 | 335 | #[test] |
|
333 | 336 | fn find_dirs_empty() { |
|
334 | 337 | // looks weird, but mercurial.pathutil.finddirs(b"") yields b"" |
|
335 | 338 | let mut dirs = super::find_dirs(HgPath::new(b"")); |
|
336 | 339 | assert_eq!(dirs.next(), Some(HgPath::new(b""))); |
|
337 | 340 | assert_eq!(dirs.next(), None); |
|
338 | 341 | assert_eq!(dirs.next(), None); |
|
339 | 342 | } |
|
340 | 343 | |
|
341 | 344 | #[test] |
|
342 | 345 | fn test_find_dirs_with_base_some() { |
|
343 | 346 | let mut dirs = super::find_dirs_with_base(HgPath::new(b"foo/bar/baz")); |
|
344 | 347 | assert_eq!( |
|
345 | 348 | dirs.next(), |
|
346 | 349 | Some((HgPath::new(b"foo/bar"), HgPath::new(b"baz"))) |
|
347 | 350 | ); |
|
348 | 351 | assert_eq!( |
|
349 | 352 | dirs.next(), |
|
350 | 353 | Some((HgPath::new(b"foo"), HgPath::new(b"bar"))) |
|
351 | 354 | ); |
|
352 | 355 | assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b"foo")))); |
|
353 | 356 | assert_eq!(dirs.next(), None); |
|
354 | 357 | assert_eq!(dirs.next(), None); |
|
355 | 358 | } |
|
356 | 359 | |
|
357 | 360 | #[test] |
|
358 | 361 | fn test_find_dirs_with_base_empty() { |
|
359 | 362 | let mut dirs = super::find_dirs_with_base(HgPath::new(b"")); |
|
360 | 363 | assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b"")))); |
|
361 | 364 | assert_eq!(dirs.next(), None); |
|
362 | 365 | assert_eq!(dirs.next(), None); |
|
363 | 366 | } |
|
364 | 367 | |
|
365 | 368 | #[test] |
|
366 | 369 | fn test_canonical_path() { |
|
367 | 370 | let root = Path::new("/repo"); |
|
368 | 371 | let cwd = Path::new("/dir"); |
|
369 | 372 | let name = Path::new("filename"); |
|
370 | 373 | assert_eq!( |
|
371 | 374 | canonical_path(root, cwd, name), |
|
372 | 375 | Err(HgPathError::NotUnderRoot { |
|
373 | 376 | path: PathBuf::from("/dir/filename"), |
|
374 | 377 | root: root.to_path_buf() |
|
375 | 378 | }) |
|
376 | 379 | ); |
|
377 | 380 | |
|
378 | 381 | let root = Path::new("/repo"); |
|
379 | 382 | let cwd = Path::new("/"); |
|
380 | 383 | let name = Path::new("filename"); |
|
381 | 384 | assert_eq!( |
|
382 | 385 | canonical_path(root, cwd, name), |
|
383 | 386 | Err(HgPathError::NotUnderRoot { |
|
384 | 387 | path: PathBuf::from("/filename"), |
|
385 | 388 | root: root.to_path_buf() |
|
386 | 389 | }) |
|
387 | 390 | ); |
|
388 | 391 | |
|
389 | 392 | let root = Path::new("/repo"); |
|
390 | 393 | let cwd = Path::new("/"); |
|
391 | 394 | let name = Path::new("repo/filename"); |
|
392 | 395 | assert_eq!( |
|
393 | 396 | canonical_path(root, cwd, name), |
|
394 | 397 | Ok(PathBuf::from("filename")) |
|
395 | 398 | ); |
|
396 | 399 | |
|
397 | 400 | let root = Path::new("/repo"); |
|
398 | 401 | let cwd = Path::new("/repo"); |
|
399 | 402 | let name = Path::new("filename"); |
|
400 | 403 | assert_eq!( |
|
401 | 404 | canonical_path(root, cwd, name), |
|
402 | 405 | Ok(PathBuf::from("filename")) |
|
403 | 406 | ); |
|
404 | 407 | |
|
405 | 408 | let root = Path::new("/repo"); |
|
406 | 409 | let cwd = Path::new("/repo/subdir"); |
|
407 | 410 | let name = Path::new("filename"); |
|
408 | 411 | assert_eq!( |
|
409 | 412 | canonical_path(root, cwd, name), |
|
410 | 413 | Ok(PathBuf::from("subdir/filename")) |
|
411 | 414 | ); |
|
412 | 415 | } |
|
413 | 416 | |
|
414 | 417 | #[test] |
|
415 | 418 | fn test_canonical_path_not_rooted() { |
|
416 | 419 | use std::fs::create_dir; |
|
417 | 420 | use tempfile::tempdir; |
|
418 | 421 | |
|
419 | 422 | let base_dir = tempdir().unwrap(); |
|
420 | 423 | let base_dir_path = base_dir.path(); |
|
421 | 424 | let beneath_repo = base_dir_path.join("a"); |
|
422 | 425 | let root = base_dir_path.join("a/b"); |
|
423 | 426 | let out_of_repo = base_dir_path.join("c"); |
|
424 | 427 | let under_repo_symlink = out_of_repo.join("d"); |
|
425 | 428 | |
|
426 | 429 | create_dir(&beneath_repo).unwrap(); |
|
427 | 430 | create_dir(&root).unwrap(); |
|
428 | 431 | |
|
429 | 432 | // TODO make portable |
|
430 | 433 | std::os::unix::fs::symlink(&root, &out_of_repo).unwrap(); |
|
431 | 434 | |
|
432 | 435 | assert_eq!( |
|
433 | 436 | canonical_path(&root, Path::new(""), out_of_repo), |
|
434 | 437 | Ok(PathBuf::from("")) |
|
435 | 438 | ); |
|
436 | 439 | assert_eq!( |
|
437 | 440 | canonical_path(&root, Path::new(""), &beneath_repo), |
|
438 | 441 | Err(HgPathError::NotUnderRoot { |
|
439 | 442 | path: beneath_repo.to_owned(), |
|
440 | 443 | root: root.to_owned() |
|
441 | 444 | }) |
|
442 | 445 | ); |
|
443 | 446 | assert_eq!( |
|
444 | 447 | canonical_path(&root, Path::new(""), &under_repo_symlink), |
|
445 | 448 | Ok(PathBuf::from("d")) |
|
446 | 449 | ); |
|
447 | 450 | } |
|
448 | 451 | } |
General Comments 0
You need to be logged in to leave comments.
Login now