##// END OF EJS Templates
rust-hgpath: add HgPath and HgPathBuf structs to encapsulate handling of paths...
Raphaël Gomès -
r43226:3fe40dd6 default
parent child Browse files
Show More
@@ -0,0 +1,409 b''
1 // hg_path.rs
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
7
8 use std::borrow::Borrow;
9 use std::ffi::{OsStr, OsString};
10 use std::ops::Deref;
11 use std::path::{Path, PathBuf};
12
13 #[derive(Debug, Eq, PartialEq)]
14 pub enum HgPathError {
15 /// Bytes from the invalid `HgPath`
16 LeadingSlash(Vec<u8>),
17 /// Bytes and index of the second slash
18 ConsecutiveSlashes(Vec<u8>, usize),
19 /// Bytes and index of the null byte
20 ContainsNullByte(Vec<u8>, usize),
21 /// Bytes
22 DecodeError(Vec<u8>),
23 }
24
25 impl ToString for HgPathError {
26 fn to_string(&self) -> String {
27 match self {
28 HgPathError::LeadingSlash(bytes) => {
29 format!("Invalid HgPath '{:?}': has a leading slash.", bytes)
30 }
31 HgPathError::ConsecutiveSlashes(bytes, pos) => format!(
32 "Invalid HgPath '{:?}': consecutive slahes at pos {}.",
33 bytes, pos
34 ),
35 HgPathError::ContainsNullByte(bytes, pos) => format!(
36 "Invalid HgPath '{:?}': contains null byte at pos {}.",
37 bytes, pos
38 ),
39 HgPathError::DecodeError(bytes) => {
40 format!("Invalid HgPath '{:?}': could not be decoded.", bytes)
41 }
42 }
43 }
44 }
45
46 impl From<HgPathError> for std::io::Error {
47 fn from(e: HgPathError) -> Self {
48 std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string())
49 }
50 }
51
52 /// This is a repository-relative path (or canonical path):
53 /// - no null characters
54 /// - `/` separates directories
55 /// - no consecutive slashes
56 /// - no leading slash,
57 /// - no `.` nor `..` of special meaning
58 /// - stored in repository and shared across platforms
59 ///
60 /// Note: there is no guarantee of any `HgPath` being well-formed at any point
61 /// in its lifetime for performance reasons and to ease ergonomics. It is
62 /// however checked using the `check_state` method before any file-system
63 /// operation.
64 ///
65 /// This allows us to be encoding-transparent as much as possible, until really
66 /// needed; `HgPath` can be transformed into a platform-specific path (`OsStr`
67 /// or `Path`) whenever more complex operations are needed:
68 /// On Unix, it's just byte-to-byte conversion. On Windows, it has to be
69 /// decoded from MBCS to WTF-8. If WindowsUTF8Plan is implemented, the source
70 /// character encoding will be determined on a per-repository basis.
71 //
72 // FIXME: (adapted from a comment in the stdlib)
73 // `HgPath::new()` current implementation relies on `Slice` being
74 // layout-compatible with `[u8]`.
75 // When attribute privacy is implemented, `Slice` should be annotated as
76 // `#[repr(transparent)]`.
77 // Anyway, `Slice` representation and layout are considered implementation
78 // detail, are not documented and must not be relied upon.
79 #[derive(Eq, Ord, PartialEq, PartialOrd, Debug, Hash)]
80 pub struct HgPath {
81 inner: [u8],
82 }
83
84 impl HgPath {
85 pub fn new<S: AsRef<[u8]> + ?Sized>(s: &S) -> &Self {
86 unsafe { &*(s.as_ref() as *const [u8] as *const Self) }
87 }
88 pub fn is_empty(&self) -> bool {
89 self.inner.is_empty()
90 }
91 pub fn len(&self) -> usize {
92 self.inner.len()
93 }
94 fn to_hg_path_buf(&self) -> HgPathBuf {
95 HgPathBuf {
96 inner: self.inner.to_owned(),
97 }
98 }
99 pub fn bytes(&self) -> std::slice::Iter<u8> {
100 self.inner.iter()
101 }
102 pub fn to_ascii_uppercase(&self) -> HgPathBuf {
103 HgPathBuf::from(self.inner.to_ascii_uppercase())
104 }
105 pub fn to_ascii_lowercase(&self) -> HgPathBuf {
106 HgPathBuf::from(self.inner.to_ascii_lowercase())
107 }
108 pub fn as_bytes(&self) -> &[u8] {
109 &self.inner
110 }
111 pub fn contains(&self, other: u8) -> bool {
112 self.inner.contains(&other)
113 }
114 pub fn join<T: ?Sized + AsRef<HgPath>>(&self, other: &T) -> HgPathBuf {
115 let mut inner = self.inner.to_owned();
116 if inner.len() != 0 && inner.last() != Some(&b'/') {
117 inner.push(b'/');
118 }
119 inner.extend(other.as_ref().bytes());
120 HgPathBuf::from_bytes(&inner)
121 }
122 /// Checks for errors in the path, short-circuiting at the first one.
123 /// This generates fine-grained errors useful for debugging.
124 /// To simply check if the path is valid during tests, use `is_valid`.
125 pub fn check_state(&self) -> Result<(), HgPathError> {
126 if self.len() == 0 {
127 return Ok(());
128 }
129 let bytes = self.as_bytes();
130 let mut previous_byte = None;
131
132 if bytes[0] == b'/' {
133 return Err(HgPathError::LeadingSlash(bytes.to_vec()));
134 }
135 for (index, byte) in bytes.iter().enumerate() {
136 match byte {
137 0 => {
138 return Err(HgPathError::ContainsNullByte(
139 bytes.to_vec(),
140 index,
141 ))
142 }
143 b'/' => {
144 if previous_byte.is_some() && previous_byte == Some(b'/') {
145 return Err(HgPathError::ConsecutiveSlashes(
146 bytes.to_vec(),
147 index,
148 ));
149 }
150 }
151 _ => (),
152 };
153 previous_byte = Some(*byte);
154 }
155 Ok(())
156 }
157
158 #[cfg(test)]
159 /// Only usable during tests to force developers to handle invalid states
160 fn is_valid(&self) -> bool {
161 self.check_state().is_ok()
162 }
163 }
164
165 #[derive(Eq, Ord, Clone, PartialEq, PartialOrd, Debug, Hash)]
166 pub struct HgPathBuf {
167 inner: Vec<u8>,
168 }
169
170 impl HgPathBuf {
171 pub fn new() -> Self {
172 Self { inner: Vec::new() }
173 }
174 pub fn push(&mut self, byte: u8) {
175 self.inner.push(byte);
176 }
177 pub fn from_bytes(s: &[u8]) -> HgPathBuf {
178 HgPath::new(s).to_owned()
179 }
180 pub fn into_vec(self) -> Vec<u8> {
181 self.inner
182 }
183 pub fn as_ref(&self) -> &[u8] {
184 self.inner.as_ref()
185 }
186 }
187
188 impl Deref for HgPathBuf {
189 type Target = HgPath;
190
191 #[inline]
192 fn deref(&self) -> &HgPath {
193 &HgPath::new(&self.inner)
194 }
195 }
196
197 impl From<Vec<u8>> for HgPathBuf {
198 fn from(vec: Vec<u8>) -> Self {
199 Self { inner: vec }
200 }
201 }
202
203 impl<T: ?Sized + AsRef<HgPath>> From<&T> for HgPathBuf {
204 fn from(s: &T) -> HgPathBuf {
205 s.as_ref().to_owned()
206 }
207 }
208
209 impl Into<Vec<u8>> for HgPathBuf {
210 fn into(self) -> Vec<u8> {
211 self.inner
212 }
213 }
214
215 impl Borrow<HgPath> for HgPathBuf {
216 fn borrow(&self) -> &HgPath {
217 &HgPath::new(self.as_bytes())
218 }
219 }
220
221 impl ToOwned for HgPath {
222 type Owned = HgPathBuf;
223
224 fn to_owned(&self) -> HgPathBuf {
225 self.to_hg_path_buf()
226 }
227 }
228
229 impl AsRef<HgPath> for HgPath {
230 fn as_ref(&self) -> &HgPath {
231 self
232 }
233 }
234
235 impl AsRef<HgPath> for HgPathBuf {
236 fn as_ref(&self) -> &HgPath {
237 self
238 }
239 }
240
241 impl Extend<u8> for HgPathBuf {
242 fn extend<T: IntoIterator<Item = u8>>(&mut self, iter: T) {
243 self.inner.extend(iter);
244 }
245 }
246
247 /// TODO: Once https://www.mercurial-scm.org/wiki/WindowsUTF8Plan is
248 /// implemented, these conversion utils will have to work differently depending
249 /// on the repository encoding: either `UTF-8` or `MBCS`.
250
251 pub fn hg_path_to_os_string<P: AsRef<HgPath>>(
252 hg_path: P,
253 ) -> Result<OsString, HgPathError> {
254 hg_path.as_ref().check_state()?;
255 let os_str;
256 #[cfg(unix)]
257 {
258 use std::os::unix::ffi::OsStrExt;
259 os_str = std::ffi::OsStr::from_bytes(&hg_path.as_ref().as_bytes());
260 }
261 #[cfg(windows)]
262 {
263 // TODO: convert from Windows MBCS (ANSI encoding) to WTF8.
264 unimplemented!();
265 }
266 Ok(os_str.to_os_string())
267 }
268
269 pub fn hg_path_to_path_buf<P: AsRef<HgPath>>(
270 hg_path: P,
271 ) -> Result<PathBuf, HgPathError> {
272 Ok(Path::new(&hg_path_to_os_string(hg_path)?).to_path_buf())
273 }
274
275 pub fn os_string_to_hg_path_buf<S: AsRef<OsStr>>(
276 os_string: S,
277 ) -> Result<HgPathBuf, HgPathError> {
278 let buf;
279 #[cfg(unix)]
280 {
281 use std::os::unix::ffi::OsStrExt;
282 buf = HgPathBuf::from_bytes(&os_string.as_ref().as_bytes());
283 }
284 #[cfg(windows)]
285 {
286 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
287 unimplemented!();
288 }
289 buf.check_state()?;
290 Ok(buf)
291 }
292
293 pub fn path_to_hg_path_buf<P: AsRef<Path>>(
294 path: P,
295 ) -> Result<HgPathBuf, HgPathError> {
296 let buf;
297 let os_str = path.as_ref().as_os_str();
298 #[cfg(unix)]
299 {
300 use std::os::unix::ffi::OsStrExt;
301 buf = HgPathBuf::from_bytes(&os_str.as_bytes());
302 }
303 #[cfg(windows)]
304 {
305 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
306 unimplemented!();
307 }
308 buf.check_state()?;
309 Ok(buf)
310 }
311
312 #[cfg(test)]
313 mod tests {
314 use super::*;
315
316 #[test]
317 fn test_path_states() {
318 assert_eq!(
319 Err(HgPathError::LeadingSlash(b"/".to_vec())),
320 HgPath::new(b"/").check_state()
321 );
322 assert_eq!(
323 Err(HgPathError::ConsecutiveSlashes(b"a/b//c".to_vec(), 4)),
324 HgPath::new(b"a/b//c").check_state()
325 );
326 assert_eq!(
327 Err(HgPathError::ContainsNullByte(b"a/b/\0c".to_vec(), 4)),
328 HgPath::new(b"a/b/\0c").check_state()
329 );
330 // TODO test HgPathError::DecodeError for the Windows implementation.
331 assert_eq!(true, HgPath::new(b"").is_valid());
332 assert_eq!(true, HgPath::new(b"a/b/c").is_valid());
333 // Backslashes in paths are not significant, but allowed
334 assert_eq!(true, HgPath::new(br"a\b/c").is_valid());
335 // Dots in paths are not significant, but allowed
336 assert_eq!(true, HgPath::new(b"a/b/../c/").is_valid());
337 assert_eq!(true, HgPath::new(b"./a/b/../c/").is_valid());
338 }
339
340 #[test]
341 fn test_iter() {
342 let path = HgPath::new(b"a");
343 let mut iter = path.bytes();
344 assert_eq!(Some(&b'a'), iter.next());
345 assert_eq!(None, iter.next_back());
346 assert_eq!(None, iter.next());
347
348 let path = HgPath::new(b"a");
349 let mut iter = path.bytes();
350 assert_eq!(Some(&b'a'), iter.next_back());
351 assert_eq!(None, iter.next_back());
352 assert_eq!(None, iter.next());
353
354 let path = HgPath::new(b"abc");
355 let mut iter = path.bytes();
356 assert_eq!(Some(&b'a'), iter.next());
357 assert_eq!(Some(&b'c'), iter.next_back());
358 assert_eq!(Some(&b'b'), iter.next_back());
359 assert_eq!(None, iter.next_back());
360 assert_eq!(None, iter.next());
361
362 let path = HgPath::new(b"abc");
363 let mut iter = path.bytes();
364 assert_eq!(Some(&b'a'), iter.next());
365 assert_eq!(Some(&b'b'), iter.next());
366 assert_eq!(Some(&b'c'), iter.next());
367 assert_eq!(None, iter.next_back());
368 assert_eq!(None, iter.next());
369
370 let path = HgPath::new(b"abc");
371 let iter = path.bytes();
372 let mut vec = Vec::new();
373 vec.extend(iter);
374 assert_eq!(vec![b'a', b'b', b'c'], vec);
375
376 let path = HgPath::new(b"abc");
377 let mut iter = path.bytes();
378 assert_eq!(Some(2), iter.rposition(|c| *c == b'c'));
379
380 let path = HgPath::new(b"abc");
381 let mut iter = path.bytes();
382 assert_eq!(None, iter.rposition(|c| *c == b'd'));
383 }
384
385 #[test]
386 fn test_join() {
387 let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"b"));
388 assert_eq!(b"a/b", path.as_bytes());
389
390 let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"b/c"));
391 assert_eq!(b"a/b/c", path.as_bytes());
392
393 // No leading slash if empty before join
394 let path = HgPathBuf::new().join(HgPath::new(b"b/c"));
395 assert_eq!(b"b/c", path.as_bytes());
396
397 // The leading slash is an invalid representation of an `HgPath`, but
398 // it can happen. This creates another invalid representation of
399 // consecutive bytes.
400 // TODO What should be done in this case? Should we silently remove
401 // the extra slash? Should we change the signature to a problematic
402 // `Result<HgPathBuf, HgPathError>`, or should we just keep it so and
403 // let the error happen upon filesystem interaction?
404 let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"/b"));
405 assert_eq!(b"a//b", path.as_bytes());
406 let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"/b"));
407 assert_eq!(b"a//b", path.as_bytes());
408 }
409 }
@@ -8,6 +8,7 b''
8 8 //! Contains useful functions, traits, structs, etc. for use in core.
9 9
10 10 pub mod files;
11 pub mod hg_path;
11 12
12 13 /// Replaces the `from` slice with the `to` slice inside the `buf` slice.
13 14 ///
General Comments 0
You need to be logged in to leave comments. Login now