##// END OF EJS Templates
rust-hg-path: add method to get part of a path relative to a prefix...
Raphaël Gomès -
r44285:4f1543a2 default
parent child Browse files
Show More
@@ -1,415 +1,464 b''
1 1 // hg_path.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use std::borrow::Borrow;
9 9 use std::ffi::{OsStr, OsString};
10 10 use std::fmt;
11 11 use std::ops::Deref;
12 12 use std::path::{Path, PathBuf};
13 13
14 14 #[derive(Debug, Eq, PartialEq)]
15 15 pub enum HgPathError {
16 16 /// Bytes from the invalid `HgPath`
17 17 LeadingSlash(Vec<u8>),
18 18 /// Bytes and index of the second slash
19 19 ConsecutiveSlashes(Vec<u8>, usize),
20 20 /// Bytes and index of the null byte
21 21 ContainsNullByte(Vec<u8>, usize),
22 22 /// Bytes
23 23 DecodeError(Vec<u8>),
24 24 }
25 25
26 26 impl ToString for HgPathError {
27 27 fn to_string(&self) -> String {
28 28 match self {
29 29 HgPathError::LeadingSlash(bytes) => {
30 30 format!("Invalid HgPath '{:?}': has a leading slash.", bytes)
31 31 }
32 32 HgPathError::ConsecutiveSlashes(bytes, pos) => format!(
33 33 "Invalid HgPath '{:?}': consecutive slahes at pos {}.",
34 34 bytes, pos
35 35 ),
36 36 HgPathError::ContainsNullByte(bytes, pos) => format!(
37 37 "Invalid HgPath '{:?}': contains null byte at pos {}.",
38 38 bytes, pos
39 39 ),
40 40 HgPathError::DecodeError(bytes) => {
41 41 format!("Invalid HgPath '{:?}': could not be decoded.", bytes)
42 42 }
43 43 }
44 44 }
45 45 }
46 46
47 47 impl From<HgPathError> for std::io::Error {
48 48 fn from(e: HgPathError) -> Self {
49 49 std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string())
50 50 }
51 51 }
52 52
53 53 /// This is a repository-relative path (or canonical path):
54 54 /// - no null characters
55 55 /// - `/` separates directories
56 56 /// - no consecutive slashes
57 57 /// - no leading slash,
58 58 /// - no `.` nor `..` of special meaning
59 59 /// - stored in repository and shared across platforms
60 60 ///
61 61 /// Note: there is no guarantee of any `HgPath` being well-formed at any point
62 62 /// in its lifetime for performance reasons and to ease ergonomics. It is
63 63 /// however checked using the `check_state` method before any file-system
64 64 /// operation.
65 65 ///
66 66 /// This allows us to be encoding-transparent as much as possible, until really
67 67 /// needed; `HgPath` can be transformed into a platform-specific path (`OsStr`
68 68 /// or `Path`) whenever more complex operations are needed:
69 69 /// On Unix, it's just byte-to-byte conversion. On Windows, it has to be
70 70 /// decoded from MBCS to WTF-8. If WindowsUTF8Plan is implemented, the source
71 71 /// character encoding will be determined on a per-repository basis.
72 72 //
73 73 // FIXME: (adapted from a comment in the stdlib)
74 74 // `HgPath::new()` current implementation relies on `Slice` being
75 75 // layout-compatible with `[u8]`.
76 76 // When attribute privacy is implemented, `Slice` should be annotated as
77 77 // `#[repr(transparent)]`.
78 78 // Anyway, `Slice` representation and layout are considered implementation
79 79 // detail, are not documented and must not be relied upon.
80 80 #[derive(Eq, Ord, PartialEq, PartialOrd, Debug, Hash)]
81 81 pub struct HgPath {
82 82 inner: [u8],
83 83 }
84 84
85 85 impl HgPath {
86 86 pub fn new<S: AsRef<[u8]> + ?Sized>(s: &S) -> &Self {
87 87 unsafe { &*(s.as_ref() as *const [u8] as *const Self) }
88 88 }
89 89 pub fn is_empty(&self) -> bool {
90 90 self.inner.is_empty()
91 91 }
92 92 pub fn len(&self) -> usize {
93 93 self.inner.len()
94 94 }
95 95 fn to_hg_path_buf(&self) -> HgPathBuf {
96 96 HgPathBuf {
97 97 inner: self.inner.to_owned(),
98 98 }
99 99 }
100 100 pub fn bytes(&self) -> std::slice::Iter<u8> {
101 101 self.inner.iter()
102 102 }
103 103 pub fn to_ascii_uppercase(&self) -> HgPathBuf {
104 104 HgPathBuf::from(self.inner.to_ascii_uppercase())
105 105 }
106 106 pub fn to_ascii_lowercase(&self) -> HgPathBuf {
107 107 HgPathBuf::from(self.inner.to_ascii_lowercase())
108 108 }
109 109 pub fn as_bytes(&self) -> &[u8] {
110 110 &self.inner
111 111 }
112 112 pub fn contains(&self, other: u8) -> bool {
113 113 self.inner.contains(&other)
114 114 }
115 pub fn starts_with(&self, needle: impl AsRef<HgPath>) -> bool {
116 self.inner.starts_with(needle.as_ref().as_bytes())
117 }
115 118 pub fn join<T: ?Sized + AsRef<HgPath>>(&self, other: &T) -> HgPathBuf {
116 119 let mut inner = self.inner.to_owned();
117 120 if inner.len() != 0 && inner.last() != Some(&b'/') {
118 121 inner.push(b'/');
119 122 }
120 123 inner.extend(other.as_ref().bytes());
121 124 HgPathBuf::from_bytes(&inner)
122 125 }
126 /// Given a base directory, returns the slice of `self` relative to the
127 /// base directory. If `base` is not a directory (does not end with a
128 /// `b'/'`), returns `None`.
129 pub fn relative_to(&self, base: impl AsRef<HgPath>) -> Option<&HgPath> {
130 let base = base.as_ref();
131 if base.is_empty() {
132 return Some(self);
133 }
134 let is_dir = base.as_bytes().ends_with(b"/");
135 if is_dir && self.starts_with(base) {
136 Some(HgPath::new(&self.inner[base.len()..]))
137 } else {
138 None
139 }
140 }
123 141 /// Checks for errors in the path, short-circuiting at the first one.
124 142 /// This generates fine-grained errors useful for debugging.
125 143 /// To simply check if the path is valid during tests, use `is_valid`.
126 144 pub fn check_state(&self) -> Result<(), HgPathError> {
127 145 if self.len() == 0 {
128 146 return Ok(());
129 147 }
130 148 let bytes = self.as_bytes();
131 149 let mut previous_byte = None;
132 150
133 151 if bytes[0] == b'/' {
134 152 return Err(HgPathError::LeadingSlash(bytes.to_vec()));
135 153 }
136 154 for (index, byte) in bytes.iter().enumerate() {
137 155 match byte {
138 156 0 => {
139 157 return Err(HgPathError::ContainsNullByte(
140 158 bytes.to_vec(),
141 159 index,
142 160 ))
143 161 }
144 162 b'/' => {
145 163 if previous_byte.is_some() && previous_byte == Some(b'/') {
146 164 return Err(HgPathError::ConsecutiveSlashes(
147 165 bytes.to_vec(),
148 166 index,
149 167 ));
150 168 }
151 169 }
152 170 _ => (),
153 171 };
154 172 previous_byte = Some(*byte);
155 173 }
156 174 Ok(())
157 175 }
158 176
159 177 #[cfg(test)]
160 178 /// Only usable during tests to force developers to handle invalid states
161 179 fn is_valid(&self) -> bool {
162 180 self.check_state().is_ok()
163 181 }
164 182 }
165 183
166 184 impl fmt::Display for HgPath {
167 185 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
168 186 write!(f, "{}", String::from_utf8_lossy(&self.inner))
169 187 }
170 188 }
171 189
172 190 #[derive(Eq, Ord, Clone, PartialEq, PartialOrd, Debug, Hash)]
173 191 pub struct HgPathBuf {
174 192 inner: Vec<u8>,
175 193 }
176 194
177 195 impl HgPathBuf {
178 196 pub fn new() -> Self {
179 197 Self { inner: Vec::new() }
180 198 }
181 199 pub fn push(&mut self, byte: u8) {
182 200 self.inner.push(byte);
183 201 }
184 202 pub fn from_bytes(s: &[u8]) -> HgPathBuf {
185 203 HgPath::new(s).to_owned()
186 204 }
187 205 pub fn into_vec(self) -> Vec<u8> {
188 206 self.inner
189 207 }
190 208 pub fn as_ref(&self) -> &[u8] {
191 209 self.inner.as_ref()
192 210 }
193 211 }
194 212
195 213 impl fmt::Display for HgPathBuf {
196 214 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
197 215 write!(f, "{}", String::from_utf8_lossy(&self.inner))
198 216 }
199 217 }
200 218
201 219 impl Deref for HgPathBuf {
202 220 type Target = HgPath;
203 221
204 222 #[inline]
205 223 fn deref(&self) -> &HgPath {
206 224 &HgPath::new(&self.inner)
207 225 }
208 226 }
209 227
210 228 impl From<Vec<u8>> for HgPathBuf {
211 229 fn from(vec: Vec<u8>) -> Self {
212 230 Self { inner: vec }
213 231 }
214 232 }
215 233
216 234 impl<T: ?Sized + AsRef<HgPath>> From<&T> for HgPathBuf {
217 235 fn from(s: &T) -> HgPathBuf {
218 236 s.as_ref().to_owned()
219 237 }
220 238 }
221 239
222 240 impl Into<Vec<u8>> for HgPathBuf {
223 241 fn into(self) -> Vec<u8> {
224 242 self.inner
225 243 }
226 244 }
227 245
228 246 impl Borrow<HgPath> for HgPathBuf {
229 247 fn borrow(&self) -> &HgPath {
230 248 &HgPath::new(self.as_bytes())
231 249 }
232 250 }
233 251
234 252 impl ToOwned for HgPath {
235 253 type Owned = HgPathBuf;
236 254
237 255 fn to_owned(&self) -> HgPathBuf {
238 256 self.to_hg_path_buf()
239 257 }
240 258 }
241 259
242 260 impl AsRef<HgPath> for HgPath {
243 261 fn as_ref(&self) -> &HgPath {
244 262 self
245 263 }
246 264 }
247 265
248 266 impl AsRef<HgPath> for HgPathBuf {
249 267 fn as_ref(&self) -> &HgPath {
250 268 self
251 269 }
252 270 }
253 271
254 272 impl Extend<u8> for HgPathBuf {
255 273 fn extend<T: IntoIterator<Item = u8>>(&mut self, iter: T) {
256 274 self.inner.extend(iter);
257 275 }
258 276 }
259 277
260 278 /// TODO: Once https://www.mercurial-scm.org/wiki/WindowsUTF8Plan is
261 279 /// implemented, these conversion utils will have to work differently depending
262 280 /// on the repository encoding: either `UTF-8` or `MBCS`.
263 281
264 282 pub fn hg_path_to_os_string<P: AsRef<HgPath>>(
265 283 hg_path: P,
266 284 ) -> Result<OsString, HgPathError> {
267 285 hg_path.as_ref().check_state()?;
268 286 let os_str;
269 287 #[cfg(unix)]
270 288 {
271 289 use std::os::unix::ffi::OsStrExt;
272 290 os_str = std::ffi::OsStr::from_bytes(&hg_path.as_ref().as_bytes());
273 291 }
274 292 // TODO Handle other platforms
275 293 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
276 294 Ok(os_str.to_os_string())
277 295 }
278 296
279 297 pub fn hg_path_to_path_buf<P: AsRef<HgPath>>(
280 298 hg_path: P,
281 299 ) -> Result<PathBuf, HgPathError> {
282 300 Ok(Path::new(&hg_path_to_os_string(hg_path)?).to_path_buf())
283 301 }
284 302
285 303 pub fn os_string_to_hg_path_buf<S: AsRef<OsStr>>(
286 304 os_string: S,
287 305 ) -> Result<HgPathBuf, HgPathError> {
288 306 let buf;
289 307 #[cfg(unix)]
290 308 {
291 309 use std::os::unix::ffi::OsStrExt;
292 310 buf = HgPathBuf::from_bytes(&os_string.as_ref().as_bytes());
293 311 }
294 312 // TODO Handle other platforms
295 313 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
296 314
297 315 buf.check_state()?;
298 316 Ok(buf)
299 317 }
300 318
301 319 pub fn path_to_hg_path_buf<P: AsRef<Path>>(
302 320 path: P,
303 321 ) -> Result<HgPathBuf, HgPathError> {
304 322 let buf;
305 323 let os_str = path.as_ref().as_os_str();
306 324 #[cfg(unix)]
307 325 {
308 326 use std::os::unix::ffi::OsStrExt;
309 327 buf = HgPathBuf::from_bytes(&os_str.as_bytes());
310 328 }
311 329 // TODO Handle other platforms
312 330 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
313 331
314 332 buf.check_state()?;
315 333 Ok(buf)
316 334 }
317 335
318 336 #[cfg(test)]
319 337 mod tests {
320 338 use super::*;
321 339
322 340 #[test]
323 341 fn test_path_states() {
324 342 assert_eq!(
325 343 Err(HgPathError::LeadingSlash(b"/".to_vec())),
326 344 HgPath::new(b"/").check_state()
327 345 );
328 346 assert_eq!(
329 347 Err(HgPathError::ConsecutiveSlashes(b"a/b//c".to_vec(), 4)),
330 348 HgPath::new(b"a/b//c").check_state()
331 349 );
332 350 assert_eq!(
333 351 Err(HgPathError::ContainsNullByte(b"a/b/\0c".to_vec(), 4)),
334 352 HgPath::new(b"a/b/\0c").check_state()
335 353 );
336 354 // TODO test HgPathError::DecodeError for the Windows implementation.
337 355 assert_eq!(true, HgPath::new(b"").is_valid());
338 356 assert_eq!(true, HgPath::new(b"a/b/c").is_valid());
339 357 // Backslashes in paths are not significant, but allowed
340 358 assert_eq!(true, HgPath::new(br"a\b/c").is_valid());
341 359 // Dots in paths are not significant, but allowed
342 360 assert_eq!(true, HgPath::new(b"a/b/../c/").is_valid());
343 361 assert_eq!(true, HgPath::new(b"./a/b/../c/").is_valid());
344 362 }
345 363
346 364 #[test]
347 365 fn test_iter() {
348 366 let path = HgPath::new(b"a");
349 367 let mut iter = path.bytes();
350 368 assert_eq!(Some(&b'a'), iter.next());
351 369 assert_eq!(None, iter.next_back());
352 370 assert_eq!(None, iter.next());
353 371
354 372 let path = HgPath::new(b"a");
355 373 let mut iter = path.bytes();
356 374 assert_eq!(Some(&b'a'), iter.next_back());
357 375 assert_eq!(None, iter.next_back());
358 376 assert_eq!(None, iter.next());
359 377
360 378 let path = HgPath::new(b"abc");
361 379 let mut iter = path.bytes();
362 380 assert_eq!(Some(&b'a'), iter.next());
363 381 assert_eq!(Some(&b'c'), iter.next_back());
364 382 assert_eq!(Some(&b'b'), iter.next_back());
365 383 assert_eq!(None, iter.next_back());
366 384 assert_eq!(None, iter.next());
367 385
368 386 let path = HgPath::new(b"abc");
369 387 let mut iter = path.bytes();
370 388 assert_eq!(Some(&b'a'), iter.next());
371 389 assert_eq!(Some(&b'b'), iter.next());
372 390 assert_eq!(Some(&b'c'), iter.next());
373 391 assert_eq!(None, iter.next_back());
374 392 assert_eq!(None, iter.next());
375 393
376 394 let path = HgPath::new(b"abc");
377 395 let iter = path.bytes();
378 396 let mut vec = Vec::new();
379 397 vec.extend(iter);
380 398 assert_eq!(vec![b'a', b'b', b'c'], vec);
381 399
382 400 let path = HgPath::new(b"abc");
383 401 let mut iter = path.bytes();
384 402 assert_eq!(Some(2), iter.rposition(|c| *c == b'c'));
385 403
386 404 let path = HgPath::new(b"abc");
387 405 let mut iter = path.bytes();
388 406 assert_eq!(None, iter.rposition(|c| *c == b'd'));
389 407 }
390 408
391 409 #[test]
392 410 fn test_join() {
393 411 let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"b"));
394 412 assert_eq!(b"a/b", path.as_bytes());
395 413
396 414 let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"b/c"));
397 415 assert_eq!(b"a/b/c", path.as_bytes());
398 416
399 417 // No leading slash if empty before join
400 418 let path = HgPathBuf::new().join(HgPath::new(b"b/c"));
401 419 assert_eq!(b"b/c", path.as_bytes());
402 420
403 421 // The leading slash is an invalid representation of an `HgPath`, but
404 422 // it can happen. This creates another invalid representation of
405 423 // consecutive bytes.
406 424 // TODO What should be done in this case? Should we silently remove
407 425 // the extra slash? Should we change the signature to a problematic
408 426 // `Result<HgPathBuf, HgPathError>`, or should we just keep it so and
409 427 // let the error happen upon filesystem interaction?
410 428 let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"/b"));
411 429 assert_eq!(b"a//b", path.as_bytes());
412 430 let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"/b"));
413 431 assert_eq!(b"a//b", path.as_bytes());
414 432 }
433
434 #[test]
435 fn test_relative_to() {
436 let path = HgPath::new(b"");
437 let base = HgPath::new(b"");
438 assert_eq!(Some(path), path.relative_to(base));
439
440 let path = HgPath::new(b"path");
441 let base = HgPath::new(b"");
442 assert_eq!(Some(path), path.relative_to(base));
443
444 let path = HgPath::new(b"a");
445 let base = HgPath::new(b"b");
446 assert_eq!(None, path.relative_to(base));
447
448 let path = HgPath::new(b"a/b");
449 let base = HgPath::new(b"a");
450 assert_eq!(None, path.relative_to(base));
451
452 let path = HgPath::new(b"a/b");
453 let base = HgPath::new(b"a/");
454 assert_eq!(Some(HgPath::new(b"b")), path.relative_to(base));
455
456 let path = HgPath::new(b"nested/path/to/b");
457 let base = HgPath::new(b"nested/path/");
458 assert_eq!(Some(HgPath::new(b"to/b")), path.relative_to(base));
459
460 let path = HgPath::new(b"ends/with/dir/");
461 let base = HgPath::new(b"ends/");
462 assert_eq!(Some(HgPath::new(b"with/dir/")), path.relative_to(base));
463 }
415 464 }
General Comments 0
You need to be logged in to leave comments. Login now