Show More
@@ -0,0 +1,409 b'' | |||
|
1 | // hg_path.rs | |
|
2 | // | |
|
3 | // Copyright 2019 Raphaël Gomès <rgomes@octobus.net> | |
|
4 | // | |
|
5 | // This software may be used and distributed according to the terms of the | |
|
6 | // GNU General Public License version 2 or any later version. | |
|
7 | ||
|
8 | use std::borrow::Borrow; | |
|
9 | use std::ffi::{OsStr, OsString}; | |
|
10 | use std::ops::Deref; | |
|
11 | use std::path::{Path, PathBuf}; | |
|
12 | ||
|
13 | #[derive(Debug, Eq, PartialEq)] | |
|
14 | pub enum HgPathError { | |
|
15 | /// Bytes from the invalid `HgPath` | |
|
16 | LeadingSlash(Vec<u8>), | |
|
17 | /// Bytes and index of the second slash | |
|
18 | ConsecutiveSlashes(Vec<u8>, usize), | |
|
19 | /// Bytes and index of the null byte | |
|
20 | ContainsNullByte(Vec<u8>, usize), | |
|
21 | /// Bytes | |
|
22 | DecodeError(Vec<u8>), | |
|
23 | } | |
|
24 | ||
|
25 | impl ToString for HgPathError { | |
|
26 | fn to_string(&self) -> String { | |
|
27 | match self { | |
|
28 | HgPathError::LeadingSlash(bytes) => { | |
|
29 | format!("Invalid HgPath '{:?}': has a leading slash.", bytes) | |
|
30 | } | |
|
31 | HgPathError::ConsecutiveSlashes(bytes, pos) => format!( | |
|
32 | "Invalid HgPath '{:?}': consecutive slahes at pos {}.", | |
|
33 | bytes, pos | |
|
34 | ), | |
|
35 | HgPathError::ContainsNullByte(bytes, pos) => format!( | |
|
36 | "Invalid HgPath '{:?}': contains null byte at pos {}.", | |
|
37 | bytes, pos | |
|
38 | ), | |
|
39 | HgPathError::DecodeError(bytes) => { | |
|
40 | format!("Invalid HgPath '{:?}': could not be decoded.", bytes) | |
|
41 | } | |
|
42 | } | |
|
43 | } | |
|
44 | } | |
|
45 | ||
|
46 | impl From<HgPathError> for std::io::Error { | |
|
47 | fn from(e: HgPathError) -> Self { | |
|
48 | std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()) | |
|
49 | } | |
|
50 | } | |
|
51 | ||
|
52 | /// This is a repository-relative path (or canonical path): | |
|
53 | /// - no null characters | |
|
54 | /// - `/` separates directories | |
|
55 | /// - no consecutive slashes | |
|
56 | /// - no leading slash, | |
|
57 | /// - no `.` nor `..` of special meaning | |
|
58 | /// - stored in repository and shared across platforms | |
|
59 | /// | |
|
60 | /// Note: there is no guarantee of any `HgPath` being well-formed at any point | |
|
61 | /// in its lifetime for performance reasons and to ease ergonomics. It is | |
|
62 | /// however checked using the `check_state` method before any file-system | |
|
63 | /// operation. | |
|
64 | /// | |
|
65 | /// This allows us to be encoding-transparent as much as possible, until really | |
|
66 | /// needed; `HgPath` can be transformed into a platform-specific path (`OsStr` | |
|
67 | /// or `Path`) whenever more complex operations are needed: | |
|
68 | /// On Unix, it's just byte-to-byte conversion. On Windows, it has to be | |
|
69 | /// decoded from MBCS to WTF-8. If WindowsUTF8Plan is implemented, the source | |
|
70 | /// character encoding will be determined on a per-repository basis. | |
|
71 | // | |
|
72 | // FIXME: (adapted from a comment in the stdlib) | |
|
73 | // `HgPath::new()` current implementation relies on `Slice` being | |
|
74 | // layout-compatible with `[u8]`. | |
|
75 | // When attribute privacy is implemented, `Slice` should be annotated as | |
|
76 | // `#[repr(transparent)]`. | |
|
77 | // Anyway, `Slice` representation and layout are considered implementation | |
|
78 | // detail, are not documented and must not be relied upon. | |
|
79 | #[derive(Eq, Ord, PartialEq, PartialOrd, Debug, Hash)] | |
|
80 | pub struct HgPath { | |
|
81 | inner: [u8], | |
|
82 | } | |
|
83 | ||
|
84 | impl HgPath { | |
|
85 | pub fn new<S: AsRef<[u8]> + ?Sized>(s: &S) -> &Self { | |
|
86 | unsafe { &*(s.as_ref() as *const [u8] as *const Self) } | |
|
87 | } | |
|
88 | pub fn is_empty(&self) -> bool { | |
|
89 | self.inner.is_empty() | |
|
90 | } | |
|
91 | pub fn len(&self) -> usize { | |
|
92 | self.inner.len() | |
|
93 | } | |
|
94 | fn to_hg_path_buf(&self) -> HgPathBuf { | |
|
95 | HgPathBuf { | |
|
96 | inner: self.inner.to_owned(), | |
|
97 | } | |
|
98 | } | |
|
99 | pub fn bytes(&self) -> std::slice::Iter<u8> { | |
|
100 | self.inner.iter() | |
|
101 | } | |
|
102 | pub fn to_ascii_uppercase(&self) -> HgPathBuf { | |
|
103 | HgPathBuf::from(self.inner.to_ascii_uppercase()) | |
|
104 | } | |
|
105 | pub fn to_ascii_lowercase(&self) -> HgPathBuf { | |
|
106 | HgPathBuf::from(self.inner.to_ascii_lowercase()) | |
|
107 | } | |
|
108 | pub fn as_bytes(&self) -> &[u8] { | |
|
109 | &self.inner | |
|
110 | } | |
|
111 | pub fn contains(&self, other: u8) -> bool { | |
|
112 | self.inner.contains(&other) | |
|
113 | } | |
|
114 | pub fn join<T: ?Sized + AsRef<HgPath>>(&self, other: &T) -> HgPathBuf { | |
|
115 | let mut inner = self.inner.to_owned(); | |
|
116 | if inner.len() != 0 && inner.last() != Some(&b'/') { | |
|
117 | inner.push(b'/'); | |
|
118 | } | |
|
119 | inner.extend(other.as_ref().bytes()); | |
|
120 | HgPathBuf::from_bytes(&inner) | |
|
121 | } | |
|
122 | /// Checks for errors in the path, short-circuiting at the first one. | |
|
123 | /// This generates fine-grained errors useful for debugging. | |
|
124 | /// To simply check if the path is valid during tests, use `is_valid`. | |
|
125 | pub fn check_state(&self) -> Result<(), HgPathError> { | |
|
126 | if self.len() == 0 { | |
|
127 | return Ok(()); | |
|
128 | } | |
|
129 | let bytes = self.as_bytes(); | |
|
130 | let mut previous_byte = None; | |
|
131 | ||
|
132 | if bytes[0] == b'/' { | |
|
133 | return Err(HgPathError::LeadingSlash(bytes.to_vec())); | |
|
134 | } | |
|
135 | for (index, byte) in bytes.iter().enumerate() { | |
|
136 | match byte { | |
|
137 | 0 => { | |
|
138 | return Err(HgPathError::ContainsNullByte( | |
|
139 | bytes.to_vec(), | |
|
140 | index, | |
|
141 | )) | |
|
142 | } | |
|
143 | b'/' => { | |
|
144 | if previous_byte.is_some() && previous_byte == Some(b'/') { | |
|
145 | return Err(HgPathError::ConsecutiveSlashes( | |
|
146 | bytes.to_vec(), | |
|
147 | index, | |
|
148 | )); | |
|
149 | } | |
|
150 | } | |
|
151 | _ => (), | |
|
152 | }; | |
|
153 | previous_byte = Some(*byte); | |
|
154 | } | |
|
155 | Ok(()) | |
|
156 | } | |
|
157 | ||
|
158 | #[cfg(test)] | |
|
159 | /// Only usable during tests to force developers to handle invalid states | |
|
160 | fn is_valid(&self) -> bool { | |
|
161 | self.check_state().is_ok() | |
|
162 | } | |
|
163 | } | |
|
164 | ||
|
165 | #[derive(Eq, Ord, Clone, PartialEq, PartialOrd, Debug, Hash)] | |
|
166 | pub struct HgPathBuf { | |
|
167 | inner: Vec<u8>, | |
|
168 | } | |
|
169 | ||
|
170 | impl HgPathBuf { | |
|
171 | pub fn new() -> Self { | |
|
172 | Self { inner: Vec::new() } | |
|
173 | } | |
|
174 | pub fn push(&mut self, byte: u8) { | |
|
175 | self.inner.push(byte); | |
|
176 | } | |
|
177 | pub fn from_bytes(s: &[u8]) -> HgPathBuf { | |
|
178 | HgPath::new(s).to_owned() | |
|
179 | } | |
|
180 | pub fn into_vec(self) -> Vec<u8> { | |
|
181 | self.inner | |
|
182 | } | |
|
183 | pub fn as_ref(&self) -> &[u8] { | |
|
184 | self.inner.as_ref() | |
|
185 | } | |
|
186 | } | |
|
187 | ||
|
188 | impl Deref for HgPathBuf { | |
|
189 | type Target = HgPath; | |
|
190 | ||
|
191 | #[inline] | |
|
192 | fn deref(&self) -> &HgPath { | |
|
193 | &HgPath::new(&self.inner) | |
|
194 | } | |
|
195 | } | |
|
196 | ||
|
197 | impl From<Vec<u8>> for HgPathBuf { | |
|
198 | fn from(vec: Vec<u8>) -> Self { | |
|
199 | Self { inner: vec } | |
|
200 | } | |
|
201 | } | |
|
202 | ||
|
203 | impl<T: ?Sized + AsRef<HgPath>> From<&T> for HgPathBuf { | |
|
204 | fn from(s: &T) -> HgPathBuf { | |
|
205 | s.as_ref().to_owned() | |
|
206 | } | |
|
207 | } | |
|
208 | ||
|
209 | impl Into<Vec<u8>> for HgPathBuf { | |
|
210 | fn into(self) -> Vec<u8> { | |
|
211 | self.inner | |
|
212 | } | |
|
213 | } | |
|
214 | ||
|
215 | impl Borrow<HgPath> for HgPathBuf { | |
|
216 | fn borrow(&self) -> &HgPath { | |
|
217 | &HgPath::new(self.as_bytes()) | |
|
218 | } | |
|
219 | } | |
|
220 | ||
|
221 | impl ToOwned for HgPath { | |
|
222 | type Owned = HgPathBuf; | |
|
223 | ||
|
224 | fn to_owned(&self) -> HgPathBuf { | |
|
225 | self.to_hg_path_buf() | |
|
226 | } | |
|
227 | } | |
|
228 | ||
|
229 | impl AsRef<HgPath> for HgPath { | |
|
230 | fn as_ref(&self) -> &HgPath { | |
|
231 | self | |
|
232 | } | |
|
233 | } | |
|
234 | ||
|
235 | impl AsRef<HgPath> for HgPathBuf { | |
|
236 | fn as_ref(&self) -> &HgPath { | |
|
237 | self | |
|
238 | } | |
|
239 | } | |
|
240 | ||
|
241 | impl Extend<u8> for HgPathBuf { | |
|
242 | fn extend<T: IntoIterator<Item = u8>>(&mut self, iter: T) { | |
|
243 | self.inner.extend(iter); | |
|
244 | } | |
|
245 | } | |
|
246 | ||
|
247 | /// TODO: Once https://www.mercurial-scm.org/wiki/WindowsUTF8Plan is | |
|
248 | /// implemented, these conversion utils will have to work differently depending | |
|
249 | /// on the repository encoding: either `UTF-8` or `MBCS`. | |
|
250 | ||
|
251 | pub fn hg_path_to_os_string<P: AsRef<HgPath>>( | |
|
252 | hg_path: P, | |
|
253 | ) -> Result<OsString, HgPathError> { | |
|
254 | hg_path.as_ref().check_state()?; | |
|
255 | let os_str; | |
|
256 | #[cfg(unix)] | |
|
257 | { | |
|
258 | use std::os::unix::ffi::OsStrExt; | |
|
259 | os_str = std::ffi::OsStr::from_bytes(&hg_path.as_ref().as_bytes()); | |
|
260 | } | |
|
261 | #[cfg(windows)] | |
|
262 | { | |
|
263 | // TODO: convert from Windows MBCS (ANSI encoding) to WTF8. | |
|
264 | unimplemented!(); | |
|
265 | } | |
|
266 | Ok(os_str.to_os_string()) | |
|
267 | } | |
|
268 | ||
|
269 | pub fn hg_path_to_path_buf<P: AsRef<HgPath>>( | |
|
270 | hg_path: P, | |
|
271 | ) -> Result<PathBuf, HgPathError> { | |
|
272 | Ok(Path::new(&hg_path_to_os_string(hg_path)?).to_path_buf()) | |
|
273 | } | |
|
274 | ||
|
275 | pub fn os_string_to_hg_path_buf<S: AsRef<OsStr>>( | |
|
276 | os_string: S, | |
|
277 | ) -> Result<HgPathBuf, HgPathError> { | |
|
278 | let buf; | |
|
279 | #[cfg(unix)] | |
|
280 | { | |
|
281 | use std::os::unix::ffi::OsStrExt; | |
|
282 | buf = HgPathBuf::from_bytes(&os_string.as_ref().as_bytes()); | |
|
283 | } | |
|
284 | #[cfg(windows)] | |
|
285 | { | |
|
286 | // TODO: convert from WTF8 to Windows MBCS (ANSI encoding). | |
|
287 | unimplemented!(); | |
|
288 | } | |
|
289 | buf.check_state()?; | |
|
290 | Ok(buf) | |
|
291 | } | |
|
292 | ||
|
293 | pub fn path_to_hg_path_buf<P: AsRef<Path>>( | |
|
294 | path: P, | |
|
295 | ) -> Result<HgPathBuf, HgPathError> { | |
|
296 | let buf; | |
|
297 | let os_str = path.as_ref().as_os_str(); | |
|
298 | #[cfg(unix)] | |
|
299 | { | |
|
300 | use std::os::unix::ffi::OsStrExt; | |
|
301 | buf = HgPathBuf::from_bytes(&os_str.as_bytes()); | |
|
302 | } | |
|
303 | #[cfg(windows)] | |
|
304 | { | |
|
305 | // TODO: convert from WTF8 to Windows MBCS (ANSI encoding). | |
|
306 | unimplemented!(); | |
|
307 | } | |
|
308 | buf.check_state()?; | |
|
309 | Ok(buf) | |
|
310 | } | |
|
311 | ||
|
312 | #[cfg(test)] | |
|
313 | mod tests { | |
|
314 | use super::*; | |
|
315 | ||
|
316 | #[test] | |
|
317 | fn test_path_states() { | |
|
318 | assert_eq!( | |
|
319 | Err(HgPathError::LeadingSlash(b"/".to_vec())), | |
|
320 | HgPath::new(b"/").check_state() | |
|
321 | ); | |
|
322 | assert_eq!( | |
|
323 | Err(HgPathError::ConsecutiveSlashes(b"a/b//c".to_vec(), 4)), | |
|
324 | HgPath::new(b"a/b//c").check_state() | |
|
325 | ); | |
|
326 | assert_eq!( | |
|
327 | Err(HgPathError::ContainsNullByte(b"a/b/\0c".to_vec(), 4)), | |
|
328 | HgPath::new(b"a/b/\0c").check_state() | |
|
329 | ); | |
|
330 | // TODO test HgPathError::DecodeError for the Windows implementation. | |
|
331 | assert_eq!(true, HgPath::new(b"").is_valid()); | |
|
332 | assert_eq!(true, HgPath::new(b"a/b/c").is_valid()); | |
|
333 | // Backslashes in paths are not significant, but allowed | |
|
334 | assert_eq!(true, HgPath::new(br"a\b/c").is_valid()); | |
|
335 | // Dots in paths are not significant, but allowed | |
|
336 | assert_eq!(true, HgPath::new(b"a/b/../c/").is_valid()); | |
|
337 | assert_eq!(true, HgPath::new(b"./a/b/../c/").is_valid()); | |
|
338 | } | |
|
339 | ||
|
340 | #[test] | |
|
341 | fn test_iter() { | |
|
342 | let path = HgPath::new(b"a"); | |
|
343 | let mut iter = path.bytes(); | |
|
344 | assert_eq!(Some(&b'a'), iter.next()); | |
|
345 | assert_eq!(None, iter.next_back()); | |
|
346 | assert_eq!(None, iter.next()); | |
|
347 | ||
|
348 | let path = HgPath::new(b"a"); | |
|
349 | let mut iter = path.bytes(); | |
|
350 | assert_eq!(Some(&b'a'), iter.next_back()); | |
|
351 | assert_eq!(None, iter.next_back()); | |
|
352 | assert_eq!(None, iter.next()); | |
|
353 | ||
|
354 | let path = HgPath::new(b"abc"); | |
|
355 | let mut iter = path.bytes(); | |
|
356 | assert_eq!(Some(&b'a'), iter.next()); | |
|
357 | assert_eq!(Some(&b'c'), iter.next_back()); | |
|
358 | assert_eq!(Some(&b'b'), iter.next_back()); | |
|
359 | assert_eq!(None, iter.next_back()); | |
|
360 | assert_eq!(None, iter.next()); | |
|
361 | ||
|
362 | let path = HgPath::new(b"abc"); | |
|
363 | let mut iter = path.bytes(); | |
|
364 | assert_eq!(Some(&b'a'), iter.next()); | |
|
365 | assert_eq!(Some(&b'b'), iter.next()); | |
|
366 | assert_eq!(Some(&b'c'), iter.next()); | |
|
367 | assert_eq!(None, iter.next_back()); | |
|
368 | assert_eq!(None, iter.next()); | |
|
369 | ||
|
370 | let path = HgPath::new(b"abc"); | |
|
371 | let iter = path.bytes(); | |
|
372 | let mut vec = Vec::new(); | |
|
373 | vec.extend(iter); | |
|
374 | assert_eq!(vec![b'a', b'b', b'c'], vec); | |
|
375 | ||
|
376 | let path = HgPath::new(b"abc"); | |
|
377 | let mut iter = path.bytes(); | |
|
378 | assert_eq!(Some(2), iter.rposition(|c| *c == b'c')); | |
|
379 | ||
|
380 | let path = HgPath::new(b"abc"); | |
|
381 | let mut iter = path.bytes(); | |
|
382 | assert_eq!(None, iter.rposition(|c| *c == b'd')); | |
|
383 | } | |
|
384 | ||
|
385 | #[test] | |
|
386 | fn test_join() { | |
|
387 | let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"b")); | |
|
388 | assert_eq!(b"a/b", path.as_bytes()); | |
|
389 | ||
|
390 | let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"b/c")); | |
|
391 | assert_eq!(b"a/b/c", path.as_bytes()); | |
|
392 | ||
|
393 | // No leading slash if empty before join | |
|
394 | let path = HgPathBuf::new().join(HgPath::new(b"b/c")); | |
|
395 | assert_eq!(b"b/c", path.as_bytes()); | |
|
396 | ||
|
397 | // The leading slash is an invalid representation of an `HgPath`, but | |
|
398 | // it can happen. This creates another invalid representation of | |
|
399 | // consecutive bytes. | |
|
400 | // TODO What should be done in this case? Should we silently remove | |
|
401 | // the extra slash? Should we change the signature to a problematic | |
|
402 | // `Result<HgPathBuf, HgPathError>`, or should we just keep it so and | |
|
403 | // let the error happen upon filesystem interaction? | |
|
404 | let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"/b")); | |
|
405 | assert_eq!(b"a//b", path.as_bytes()); | |
|
406 | let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"/b")); | |
|
407 | assert_eq!(b"a//b", path.as_bytes()); | |
|
408 | } | |
|
409 | } |
General Comments 0
You need to be logged in to leave comments.
Login now