Show More
@@ -0,0 +1,409 b'' | |||||
|
1 | // hg_path.rs | |||
|
2 | // | |||
|
3 | // Copyright 2019 Raphaël Gomès <rgomes@octobus.net> | |||
|
4 | // | |||
|
5 | // This software may be used and distributed according to the terms of the | |||
|
6 | // GNU General Public License version 2 or any later version. | |||
|
7 | ||||
|
8 | use std::borrow::Borrow; | |||
|
9 | use std::ffi::{OsStr, OsString}; | |||
|
10 | use std::ops::Deref; | |||
|
11 | use std::path::{Path, PathBuf}; | |||
|
12 | ||||
|
13 | #[derive(Debug, Eq, PartialEq)] | |||
|
14 | pub enum HgPathError { | |||
|
15 | /// Bytes from the invalid `HgPath` | |||
|
16 | LeadingSlash(Vec<u8>), | |||
|
17 | /// Bytes and index of the second slash | |||
|
18 | ConsecutiveSlashes(Vec<u8>, usize), | |||
|
19 | /// Bytes and index of the null byte | |||
|
20 | ContainsNullByte(Vec<u8>, usize), | |||
|
21 | /// Bytes | |||
|
22 | DecodeError(Vec<u8>), | |||
|
23 | } | |||
|
24 | ||||
|
25 | impl ToString for HgPathError { | |||
|
26 | fn to_string(&self) -> String { | |||
|
27 | match self { | |||
|
28 | HgPathError::LeadingSlash(bytes) => { | |||
|
29 | format!("Invalid HgPath '{:?}': has a leading slash.", bytes) | |||
|
30 | } | |||
|
31 | HgPathError::ConsecutiveSlashes(bytes, pos) => format!( | |||
|
32 | "Invalid HgPath '{:?}': consecutive slahes at pos {}.", | |||
|
33 | bytes, pos | |||
|
34 | ), | |||
|
35 | HgPathError::ContainsNullByte(bytes, pos) => format!( | |||
|
36 | "Invalid HgPath '{:?}': contains null byte at pos {}.", | |||
|
37 | bytes, pos | |||
|
38 | ), | |||
|
39 | HgPathError::DecodeError(bytes) => { | |||
|
40 | format!("Invalid HgPath '{:?}': could not be decoded.", bytes) | |||
|
41 | } | |||
|
42 | } | |||
|
43 | } | |||
|
44 | } | |||
|
45 | ||||
|
46 | impl From<HgPathError> for std::io::Error { | |||
|
47 | fn from(e: HgPathError) -> Self { | |||
|
48 | std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()) | |||
|
49 | } | |||
|
50 | } | |||
|
51 | ||||
|
52 | /// This is a repository-relative path (or canonical path): | |||
|
53 | /// - no null characters | |||
|
54 | /// - `/` separates directories | |||
|
55 | /// - no consecutive slashes | |||
|
56 | /// - no leading slash, | |||
|
57 | /// - no `.` nor `..` of special meaning | |||
|
58 | /// - stored in repository and shared across platforms | |||
|
59 | /// | |||
|
60 | /// Note: there is no guarantee of any `HgPath` being well-formed at any point | |||
|
61 | /// in its lifetime for performance reasons and to ease ergonomics. It is | |||
|
62 | /// however checked using the `check_state` method before any file-system | |||
|
63 | /// operation. | |||
|
64 | /// | |||
|
65 | /// This allows us to be encoding-transparent as much as possible, until really | |||
|
66 | /// needed; `HgPath` can be transformed into a platform-specific path (`OsStr` | |||
|
67 | /// or `Path`) whenever more complex operations are needed: | |||
|
68 | /// On Unix, it's just byte-to-byte conversion. On Windows, it has to be | |||
|
69 | /// decoded from MBCS to WTF-8. If WindowsUTF8Plan is implemented, the source | |||
|
70 | /// character encoding will be determined on a per-repository basis. | |||
|
71 | // | |||
|
72 | // FIXME: (adapted from a comment in the stdlib) | |||
|
73 | // `HgPath::new()` current implementation relies on `Slice` being | |||
|
74 | // layout-compatible with `[u8]`. | |||
|
75 | // When attribute privacy is implemented, `Slice` should be annotated as | |||
|
76 | // `#[repr(transparent)]`. | |||
|
77 | // Anyway, `Slice` representation and layout are considered implementation | |||
|
78 | // detail, are not documented and must not be relied upon. | |||
|
79 | #[derive(Eq, Ord, PartialEq, PartialOrd, Debug, Hash)] | |||
|
80 | pub struct HgPath { | |||
|
81 | inner: [u8], | |||
|
82 | } | |||
|
83 | ||||
|
84 | impl HgPath { | |||
|
85 | pub fn new<S: AsRef<[u8]> + ?Sized>(s: &S) -> &Self { | |||
|
86 | unsafe { &*(s.as_ref() as *const [u8] as *const Self) } | |||
|
87 | } | |||
|
88 | pub fn is_empty(&self) -> bool { | |||
|
89 | self.inner.is_empty() | |||
|
90 | } | |||
|
91 | pub fn len(&self) -> usize { | |||
|
92 | self.inner.len() | |||
|
93 | } | |||
|
94 | fn to_hg_path_buf(&self) -> HgPathBuf { | |||
|
95 | HgPathBuf { | |||
|
96 | inner: self.inner.to_owned(), | |||
|
97 | } | |||
|
98 | } | |||
|
99 | pub fn bytes(&self) -> std::slice::Iter<u8> { | |||
|
100 | self.inner.iter() | |||
|
101 | } | |||
|
102 | pub fn to_ascii_uppercase(&self) -> HgPathBuf { | |||
|
103 | HgPathBuf::from(self.inner.to_ascii_uppercase()) | |||
|
104 | } | |||
|
105 | pub fn to_ascii_lowercase(&self) -> HgPathBuf { | |||
|
106 | HgPathBuf::from(self.inner.to_ascii_lowercase()) | |||
|
107 | } | |||
|
108 | pub fn as_bytes(&self) -> &[u8] { | |||
|
109 | &self.inner | |||
|
110 | } | |||
|
111 | pub fn contains(&self, other: u8) -> bool { | |||
|
112 | self.inner.contains(&other) | |||
|
113 | } | |||
|
114 | pub fn join<T: ?Sized + AsRef<HgPath>>(&self, other: &T) -> HgPathBuf { | |||
|
115 | let mut inner = self.inner.to_owned(); | |||
|
116 | if inner.len() != 0 && inner.last() != Some(&b'/') { | |||
|
117 | inner.push(b'/'); | |||
|
118 | } | |||
|
119 | inner.extend(other.as_ref().bytes()); | |||
|
120 | HgPathBuf::from_bytes(&inner) | |||
|
121 | } | |||
|
122 | /// Checks for errors in the path, short-circuiting at the first one. | |||
|
123 | /// This generates fine-grained errors useful for debugging. | |||
|
124 | /// To simply check if the path is valid during tests, use `is_valid`. | |||
|
125 | pub fn check_state(&self) -> Result<(), HgPathError> { | |||
|
126 | if self.len() == 0 { | |||
|
127 | return Ok(()); | |||
|
128 | } | |||
|
129 | let bytes = self.as_bytes(); | |||
|
130 | let mut previous_byte = None; | |||
|
131 | ||||
|
132 | if bytes[0] == b'/' { | |||
|
133 | return Err(HgPathError::LeadingSlash(bytes.to_vec())); | |||
|
134 | } | |||
|
135 | for (index, byte) in bytes.iter().enumerate() { | |||
|
136 | match byte { | |||
|
137 | 0 => { | |||
|
138 | return Err(HgPathError::ContainsNullByte( | |||
|
139 | bytes.to_vec(), | |||
|
140 | index, | |||
|
141 | )) | |||
|
142 | } | |||
|
143 | b'/' => { | |||
|
144 | if previous_byte.is_some() && previous_byte == Some(b'/') { | |||
|
145 | return Err(HgPathError::ConsecutiveSlashes( | |||
|
146 | bytes.to_vec(), | |||
|
147 | index, | |||
|
148 | )); | |||
|
149 | } | |||
|
150 | } | |||
|
151 | _ => (), | |||
|
152 | }; | |||
|
153 | previous_byte = Some(*byte); | |||
|
154 | } | |||
|
155 | Ok(()) | |||
|
156 | } | |||
|
157 | ||||
|
158 | #[cfg(test)] | |||
|
159 | /// Only usable during tests to force developers to handle invalid states | |||
|
160 | fn is_valid(&self) -> bool { | |||
|
161 | self.check_state().is_ok() | |||
|
162 | } | |||
|
163 | } | |||
|
164 | ||||
|
165 | #[derive(Eq, Ord, Clone, PartialEq, PartialOrd, Debug, Hash)] | |||
|
166 | pub struct HgPathBuf { | |||
|
167 | inner: Vec<u8>, | |||
|
168 | } | |||
|
169 | ||||
|
170 | impl HgPathBuf { | |||
|
171 | pub fn new() -> Self { | |||
|
172 | Self { inner: Vec::new() } | |||
|
173 | } | |||
|
174 | pub fn push(&mut self, byte: u8) { | |||
|
175 | self.inner.push(byte); | |||
|
176 | } | |||
|
177 | pub fn from_bytes(s: &[u8]) -> HgPathBuf { | |||
|
178 | HgPath::new(s).to_owned() | |||
|
179 | } | |||
|
180 | pub fn into_vec(self) -> Vec<u8> { | |||
|
181 | self.inner | |||
|
182 | } | |||
|
183 | pub fn as_ref(&self) -> &[u8] { | |||
|
184 | self.inner.as_ref() | |||
|
185 | } | |||
|
186 | } | |||
|
187 | ||||
|
188 | impl Deref for HgPathBuf { | |||
|
189 | type Target = HgPath; | |||
|
190 | ||||
|
191 | #[inline] | |||
|
192 | fn deref(&self) -> &HgPath { | |||
|
193 | &HgPath::new(&self.inner) | |||
|
194 | } | |||
|
195 | } | |||
|
196 | ||||
|
197 | impl From<Vec<u8>> for HgPathBuf { | |||
|
198 | fn from(vec: Vec<u8>) -> Self { | |||
|
199 | Self { inner: vec } | |||
|
200 | } | |||
|
201 | } | |||
|
202 | ||||
|
203 | impl<T: ?Sized + AsRef<HgPath>> From<&T> for HgPathBuf { | |||
|
204 | fn from(s: &T) -> HgPathBuf { | |||
|
205 | s.as_ref().to_owned() | |||
|
206 | } | |||
|
207 | } | |||
|
208 | ||||
|
209 | impl Into<Vec<u8>> for HgPathBuf { | |||
|
210 | fn into(self) -> Vec<u8> { | |||
|
211 | self.inner | |||
|
212 | } | |||
|
213 | } | |||
|
214 | ||||
|
215 | impl Borrow<HgPath> for HgPathBuf { | |||
|
216 | fn borrow(&self) -> &HgPath { | |||
|
217 | &HgPath::new(self.as_bytes()) | |||
|
218 | } | |||
|
219 | } | |||
|
220 | ||||
|
221 | impl ToOwned for HgPath { | |||
|
222 | type Owned = HgPathBuf; | |||
|
223 | ||||
|
224 | fn to_owned(&self) -> HgPathBuf { | |||
|
225 | self.to_hg_path_buf() | |||
|
226 | } | |||
|
227 | } | |||
|
228 | ||||
|
229 | impl AsRef<HgPath> for HgPath { | |||
|
230 | fn as_ref(&self) -> &HgPath { | |||
|
231 | self | |||
|
232 | } | |||
|
233 | } | |||
|
234 | ||||
|
235 | impl AsRef<HgPath> for HgPathBuf { | |||
|
236 | fn as_ref(&self) -> &HgPath { | |||
|
237 | self | |||
|
238 | } | |||
|
239 | } | |||
|
240 | ||||
|
241 | impl Extend<u8> for HgPathBuf { | |||
|
242 | fn extend<T: IntoIterator<Item = u8>>(&mut self, iter: T) { | |||
|
243 | self.inner.extend(iter); | |||
|
244 | } | |||
|
245 | } | |||
|
246 | ||||
|
247 | /// TODO: Once https://www.mercurial-scm.org/wiki/WindowsUTF8Plan is | |||
|
248 | /// implemented, these conversion utils will have to work differently depending | |||
|
249 | /// on the repository encoding: either `UTF-8` or `MBCS`. | |||
|
250 | ||||
|
251 | pub fn hg_path_to_os_string<P: AsRef<HgPath>>( | |||
|
252 | hg_path: P, | |||
|
253 | ) -> Result<OsString, HgPathError> { | |||
|
254 | hg_path.as_ref().check_state()?; | |||
|
255 | let os_str; | |||
|
256 | #[cfg(unix)] | |||
|
257 | { | |||
|
258 | use std::os::unix::ffi::OsStrExt; | |||
|
259 | os_str = std::ffi::OsStr::from_bytes(&hg_path.as_ref().as_bytes()); | |||
|
260 | } | |||
|
261 | #[cfg(windows)] | |||
|
262 | { | |||
|
263 | // TODO: convert from Windows MBCS (ANSI encoding) to WTF8. | |||
|
264 | unimplemented!(); | |||
|
265 | } | |||
|
266 | Ok(os_str.to_os_string()) | |||
|
267 | } | |||
|
268 | ||||
|
269 | pub fn hg_path_to_path_buf<P: AsRef<HgPath>>( | |||
|
270 | hg_path: P, | |||
|
271 | ) -> Result<PathBuf, HgPathError> { | |||
|
272 | Ok(Path::new(&hg_path_to_os_string(hg_path)?).to_path_buf()) | |||
|
273 | } | |||
|
274 | ||||
|
275 | pub fn os_string_to_hg_path_buf<S: AsRef<OsStr>>( | |||
|
276 | os_string: S, | |||
|
277 | ) -> Result<HgPathBuf, HgPathError> { | |||
|
278 | let buf; | |||
|
279 | #[cfg(unix)] | |||
|
280 | { | |||
|
281 | use std::os::unix::ffi::OsStrExt; | |||
|
282 | buf = HgPathBuf::from_bytes(&os_string.as_ref().as_bytes()); | |||
|
283 | } | |||
|
284 | #[cfg(windows)] | |||
|
285 | { | |||
|
286 | // TODO: convert from WTF8 to Windows MBCS (ANSI encoding). | |||
|
287 | unimplemented!(); | |||
|
288 | } | |||
|
289 | buf.check_state()?; | |||
|
290 | Ok(buf) | |||
|
291 | } | |||
|
292 | ||||
|
293 | pub fn path_to_hg_path_buf<P: AsRef<Path>>( | |||
|
294 | path: P, | |||
|
295 | ) -> Result<HgPathBuf, HgPathError> { | |||
|
296 | let buf; | |||
|
297 | let os_str = path.as_ref().as_os_str(); | |||
|
298 | #[cfg(unix)] | |||
|
299 | { | |||
|
300 | use std::os::unix::ffi::OsStrExt; | |||
|
301 | buf = HgPathBuf::from_bytes(&os_str.as_bytes()); | |||
|
302 | } | |||
|
303 | #[cfg(windows)] | |||
|
304 | { | |||
|
305 | // TODO: convert from WTF8 to Windows MBCS (ANSI encoding). | |||
|
306 | unimplemented!(); | |||
|
307 | } | |||
|
308 | buf.check_state()?; | |||
|
309 | Ok(buf) | |||
|
310 | } | |||
|
311 | ||||
|
312 | #[cfg(test)] | |||
|
313 | mod tests { | |||
|
314 | use super::*; | |||
|
315 | ||||
|
316 | #[test] | |||
|
317 | fn test_path_states() { | |||
|
318 | assert_eq!( | |||
|
319 | Err(HgPathError::LeadingSlash(b"/".to_vec())), | |||
|
320 | HgPath::new(b"/").check_state() | |||
|
321 | ); | |||
|
322 | assert_eq!( | |||
|
323 | Err(HgPathError::ConsecutiveSlashes(b"a/b//c".to_vec(), 4)), | |||
|
324 | HgPath::new(b"a/b//c").check_state() | |||
|
325 | ); | |||
|
326 | assert_eq!( | |||
|
327 | Err(HgPathError::ContainsNullByte(b"a/b/\0c".to_vec(), 4)), | |||
|
328 | HgPath::new(b"a/b/\0c").check_state() | |||
|
329 | ); | |||
|
330 | // TODO test HgPathError::DecodeError for the Windows implementation. | |||
|
331 | assert_eq!(true, HgPath::new(b"").is_valid()); | |||
|
332 | assert_eq!(true, HgPath::new(b"a/b/c").is_valid()); | |||
|
333 | // Backslashes in paths are not significant, but allowed | |||
|
334 | assert_eq!(true, HgPath::new(br"a\b/c").is_valid()); | |||
|
335 | // Dots in paths are not significant, but allowed | |||
|
336 | assert_eq!(true, HgPath::new(b"a/b/../c/").is_valid()); | |||
|
337 | assert_eq!(true, HgPath::new(b"./a/b/../c/").is_valid()); | |||
|
338 | } | |||
|
339 | ||||
|
340 | #[test] | |||
|
341 | fn test_iter() { | |||
|
342 | let path = HgPath::new(b"a"); | |||
|
343 | let mut iter = path.bytes(); | |||
|
344 | assert_eq!(Some(&b'a'), iter.next()); | |||
|
345 | assert_eq!(None, iter.next_back()); | |||
|
346 | assert_eq!(None, iter.next()); | |||
|
347 | ||||
|
348 | let path = HgPath::new(b"a"); | |||
|
349 | let mut iter = path.bytes(); | |||
|
350 | assert_eq!(Some(&b'a'), iter.next_back()); | |||
|
351 | assert_eq!(None, iter.next_back()); | |||
|
352 | assert_eq!(None, iter.next()); | |||
|
353 | ||||
|
354 | let path = HgPath::new(b"abc"); | |||
|
355 | let mut iter = path.bytes(); | |||
|
356 | assert_eq!(Some(&b'a'), iter.next()); | |||
|
357 | assert_eq!(Some(&b'c'), iter.next_back()); | |||
|
358 | assert_eq!(Some(&b'b'), iter.next_back()); | |||
|
359 | assert_eq!(None, iter.next_back()); | |||
|
360 | assert_eq!(None, iter.next()); | |||
|
361 | ||||
|
362 | let path = HgPath::new(b"abc"); | |||
|
363 | let mut iter = path.bytes(); | |||
|
364 | assert_eq!(Some(&b'a'), iter.next()); | |||
|
365 | assert_eq!(Some(&b'b'), iter.next()); | |||
|
366 | assert_eq!(Some(&b'c'), iter.next()); | |||
|
367 | assert_eq!(None, iter.next_back()); | |||
|
368 | assert_eq!(None, iter.next()); | |||
|
369 | ||||
|
370 | let path = HgPath::new(b"abc"); | |||
|
371 | let iter = path.bytes(); | |||
|
372 | let mut vec = Vec::new(); | |||
|
373 | vec.extend(iter); | |||
|
374 | assert_eq!(vec![b'a', b'b', b'c'], vec); | |||
|
375 | ||||
|
376 | let path = HgPath::new(b"abc"); | |||
|
377 | let mut iter = path.bytes(); | |||
|
378 | assert_eq!(Some(2), iter.rposition(|c| *c == b'c')); | |||
|
379 | ||||
|
380 | let path = HgPath::new(b"abc"); | |||
|
381 | let mut iter = path.bytes(); | |||
|
382 | assert_eq!(None, iter.rposition(|c| *c == b'd')); | |||
|
383 | } | |||
|
384 | ||||
|
385 | #[test] | |||
|
386 | fn test_join() { | |||
|
387 | let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"b")); | |||
|
388 | assert_eq!(b"a/b", path.as_bytes()); | |||
|
389 | ||||
|
390 | let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"b/c")); | |||
|
391 | assert_eq!(b"a/b/c", path.as_bytes()); | |||
|
392 | ||||
|
393 | // No leading slash if empty before join | |||
|
394 | let path = HgPathBuf::new().join(HgPath::new(b"b/c")); | |||
|
395 | assert_eq!(b"b/c", path.as_bytes()); | |||
|
396 | ||||
|
397 | // The leading slash is an invalid representation of an `HgPath`, but | |||
|
398 | // it can happen. This creates another invalid representation of | |||
|
399 | // consecutive bytes. | |||
|
400 | // TODO What should be done in this case? Should we silently remove | |||
|
401 | // the extra slash? Should we change the signature to a problematic | |||
|
402 | // `Result<HgPathBuf, HgPathError>`, or should we just keep it so and | |||
|
403 | // let the error happen upon filesystem interaction? | |||
|
404 | let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"/b")); | |||
|
405 | assert_eq!(b"a//b", path.as_bytes()); | |||
|
406 | let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"/b")); | |||
|
407 | assert_eq!(b"a//b", path.as_bytes()); | |||
|
408 | } | |||
|
409 | } |
@@ -8,6 +8,7 b'' | |||||
8 | //! Contains useful functions, traits, structs, etc. for use in core. |
|
8 | //! Contains useful functions, traits, structs, etc. for use in core. | |
9 |
|
9 | |||
10 | pub mod files; |
|
10 | pub mod files; | |
|
11 | pub mod hg_path; | |||
11 |
|
12 | |||
12 | /// Replaces the `from` slice with the `to` slice inside the `buf` slice. |
|
13 | /// Replaces the `from` slice with the `to` slice inside the `buf` slice. | |
13 | /// |
|
14 | /// |
General Comments 0
You need to be logged in to leave comments.
Login now