##// END OF EJS Templates
rust-hg-path: implement more readable custom Debug for HgPath{,Buf}...
Martin von Zweigbergk -
r44360:4b3c8df1 default
parent child Browse files
Show More
@@ -1,464 +1,476 b''
1 1 // hg_path.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use std::borrow::Borrow;
9 9 use std::ffi::{OsStr, OsString};
10 10 use std::fmt;
11 11 use std::ops::Deref;
12 12 use std::path::{Path, PathBuf};
13 13
14 14 #[derive(Debug, Eq, PartialEq)]
15 15 pub enum HgPathError {
16 16 /// Bytes from the invalid `HgPath`
17 17 LeadingSlash(Vec<u8>),
18 18 /// Bytes and index of the second slash
19 19 ConsecutiveSlashes(Vec<u8>, usize),
20 20 /// Bytes and index of the null byte
21 21 ContainsNullByte(Vec<u8>, usize),
22 22 /// Bytes
23 23 DecodeError(Vec<u8>),
24 24 }
25 25
26 26 impl ToString for HgPathError {
27 27 fn to_string(&self) -> String {
28 28 match self {
29 29 HgPathError::LeadingSlash(bytes) => {
30 30 format!("Invalid HgPath '{:?}': has a leading slash.", bytes)
31 31 }
32 32 HgPathError::ConsecutiveSlashes(bytes, pos) => format!(
33 33 "Invalid HgPath '{:?}': consecutive slahes at pos {}.",
34 34 bytes, pos
35 35 ),
36 36 HgPathError::ContainsNullByte(bytes, pos) => format!(
37 37 "Invalid HgPath '{:?}': contains null byte at pos {}.",
38 38 bytes, pos
39 39 ),
40 40 HgPathError::DecodeError(bytes) => {
41 41 format!("Invalid HgPath '{:?}': could not be decoded.", bytes)
42 42 }
43 43 }
44 44 }
45 45 }
46 46
47 47 impl From<HgPathError> for std::io::Error {
48 48 fn from(e: HgPathError) -> Self {
49 49 std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string())
50 50 }
51 51 }
52 52
53 53 /// This is a repository-relative path (or canonical path):
54 54 /// - no null characters
55 55 /// - `/` separates directories
56 56 /// - no consecutive slashes
57 57 /// - no leading slash,
58 58 /// - no `.` nor `..` of special meaning
59 59 /// - stored in repository and shared across platforms
60 60 ///
61 61 /// Note: there is no guarantee of any `HgPath` being well-formed at any point
62 62 /// in its lifetime for performance reasons and to ease ergonomics. It is
63 63 /// however checked using the `check_state` method before any file-system
64 64 /// operation.
65 65 ///
66 66 /// This allows us to be encoding-transparent as much as possible, until really
67 67 /// needed; `HgPath` can be transformed into a platform-specific path (`OsStr`
68 68 /// or `Path`) whenever more complex operations are needed:
69 69 /// On Unix, it's just byte-to-byte conversion. On Windows, it has to be
70 70 /// decoded from MBCS to WTF-8. If WindowsUTF8Plan is implemented, the source
71 71 /// character encoding will be determined on a per-repository basis.
72 72 //
73 73 // FIXME: (adapted from a comment in the stdlib)
74 74 // `HgPath::new()` current implementation relies on `Slice` being
75 75 // layout-compatible with `[u8]`.
76 76 // When attribute privacy is implemented, `Slice` should be annotated as
77 77 // `#[repr(transparent)]`.
78 78 // Anyway, `Slice` representation and layout are considered implementation
79 79 // detail, are not documented and must not be relied upon.
80 #[derive(Eq, Ord, PartialEq, PartialOrd, Debug, Hash)]
80 #[derive(Eq, Ord, PartialEq, PartialOrd, Hash)]
81 81 pub struct HgPath {
82 82 inner: [u8],
83 83 }
84 84
85 85 impl HgPath {
86 86 pub fn new<S: AsRef<[u8]> + ?Sized>(s: &S) -> &Self {
87 87 unsafe { &*(s.as_ref() as *const [u8] as *const Self) }
88 88 }
89 89 pub fn is_empty(&self) -> bool {
90 90 self.inner.is_empty()
91 91 }
92 92 pub fn len(&self) -> usize {
93 93 self.inner.len()
94 94 }
95 95 fn to_hg_path_buf(&self) -> HgPathBuf {
96 96 HgPathBuf {
97 97 inner: self.inner.to_owned(),
98 98 }
99 99 }
100 100 pub fn bytes(&self) -> std::slice::Iter<u8> {
101 101 self.inner.iter()
102 102 }
103 103 pub fn to_ascii_uppercase(&self) -> HgPathBuf {
104 104 HgPathBuf::from(self.inner.to_ascii_uppercase())
105 105 }
106 106 pub fn to_ascii_lowercase(&self) -> HgPathBuf {
107 107 HgPathBuf::from(self.inner.to_ascii_lowercase())
108 108 }
109 109 pub fn as_bytes(&self) -> &[u8] {
110 110 &self.inner
111 111 }
112 112 pub fn contains(&self, other: u8) -> bool {
113 113 self.inner.contains(&other)
114 114 }
115 115 pub fn starts_with(&self, needle: impl AsRef<HgPath>) -> bool {
116 116 self.inner.starts_with(needle.as_ref().as_bytes())
117 117 }
118 118 pub fn join<T: ?Sized + AsRef<HgPath>>(&self, other: &T) -> HgPathBuf {
119 119 let mut inner = self.inner.to_owned();
120 120 if inner.len() != 0 && inner.last() != Some(&b'/') {
121 121 inner.push(b'/');
122 122 }
123 123 inner.extend(other.as_ref().bytes());
124 124 HgPathBuf::from_bytes(&inner)
125 125 }
126 126 /// Given a base directory, returns the slice of `self` relative to the
127 127 /// base directory. If `base` is not a directory (does not end with a
128 128 /// `b'/'`), returns `None`.
129 129 pub fn relative_to(&self, base: impl AsRef<HgPath>) -> Option<&HgPath> {
130 130 let base = base.as_ref();
131 131 if base.is_empty() {
132 132 return Some(self);
133 133 }
134 134 let is_dir = base.as_bytes().ends_with(b"/");
135 135 if is_dir && self.starts_with(base) {
136 136 Some(HgPath::new(&self.inner[base.len()..]))
137 137 } else {
138 138 None
139 139 }
140 140 }
141 141 /// Checks for errors in the path, short-circuiting at the first one.
142 142 /// This generates fine-grained errors useful for debugging.
143 143 /// To simply check if the path is valid during tests, use `is_valid`.
144 144 pub fn check_state(&self) -> Result<(), HgPathError> {
145 145 if self.len() == 0 {
146 146 return Ok(());
147 147 }
148 148 let bytes = self.as_bytes();
149 149 let mut previous_byte = None;
150 150
151 151 if bytes[0] == b'/' {
152 152 return Err(HgPathError::LeadingSlash(bytes.to_vec()));
153 153 }
154 154 for (index, byte) in bytes.iter().enumerate() {
155 155 match byte {
156 156 0 => {
157 157 return Err(HgPathError::ContainsNullByte(
158 158 bytes.to_vec(),
159 159 index,
160 160 ))
161 161 }
162 162 b'/' => {
163 163 if previous_byte.is_some() && previous_byte == Some(b'/') {
164 164 return Err(HgPathError::ConsecutiveSlashes(
165 165 bytes.to_vec(),
166 166 index,
167 167 ));
168 168 }
169 169 }
170 170 _ => (),
171 171 };
172 172 previous_byte = Some(*byte);
173 173 }
174 174 Ok(())
175 175 }
176 176
177 177 #[cfg(test)]
178 178 /// Only usable during tests to force developers to handle invalid states
179 179 fn is_valid(&self) -> bool {
180 180 self.check_state().is_ok()
181 181 }
182 182 }
183 183
184 impl fmt::Debug for HgPath {
185 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
186 write!(f, "HgPath({:?})", String::from_utf8_lossy(&self.inner))
187 }
188 }
189
184 190 impl fmt::Display for HgPath {
185 191 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
186 192 write!(f, "{}", String::from_utf8_lossy(&self.inner))
187 193 }
188 194 }
189 195
190 #[derive(Eq, Ord, Clone, PartialEq, PartialOrd, Debug, Hash)]
196 #[derive(Eq, Ord, Clone, PartialEq, PartialOrd, Hash)]
191 197 pub struct HgPathBuf {
192 198 inner: Vec<u8>,
193 199 }
194 200
195 201 impl HgPathBuf {
196 202 pub fn new() -> Self {
197 203 Self { inner: Vec::new() }
198 204 }
199 205 pub fn push(&mut self, byte: u8) {
200 206 self.inner.push(byte);
201 207 }
202 208 pub fn from_bytes(s: &[u8]) -> HgPathBuf {
203 209 HgPath::new(s).to_owned()
204 210 }
205 211 pub fn into_vec(self) -> Vec<u8> {
206 212 self.inner
207 213 }
208 214 pub fn as_ref(&self) -> &[u8] {
209 215 self.inner.as_ref()
210 216 }
211 217 }
212 218
219 impl fmt::Debug for HgPathBuf {
220 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
221 write!(f, "HgPathBuf({:?})", String::from_utf8_lossy(&self.inner))
222 }
223 }
224
213 225 impl fmt::Display for HgPathBuf {
214 226 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
215 227 write!(f, "{}", String::from_utf8_lossy(&self.inner))
216 228 }
217 229 }
218 230
219 231 impl Deref for HgPathBuf {
220 232 type Target = HgPath;
221 233
222 234 #[inline]
223 235 fn deref(&self) -> &HgPath {
224 236 &HgPath::new(&self.inner)
225 237 }
226 238 }
227 239
228 240 impl From<Vec<u8>> for HgPathBuf {
229 241 fn from(vec: Vec<u8>) -> Self {
230 242 Self { inner: vec }
231 243 }
232 244 }
233 245
234 246 impl<T: ?Sized + AsRef<HgPath>> From<&T> for HgPathBuf {
235 247 fn from(s: &T) -> HgPathBuf {
236 248 s.as_ref().to_owned()
237 249 }
238 250 }
239 251
240 252 impl Into<Vec<u8>> for HgPathBuf {
241 253 fn into(self) -> Vec<u8> {
242 254 self.inner
243 255 }
244 256 }
245 257
246 258 impl Borrow<HgPath> for HgPathBuf {
247 259 fn borrow(&self) -> &HgPath {
248 260 &HgPath::new(self.as_bytes())
249 261 }
250 262 }
251 263
252 264 impl ToOwned for HgPath {
253 265 type Owned = HgPathBuf;
254 266
255 267 fn to_owned(&self) -> HgPathBuf {
256 268 self.to_hg_path_buf()
257 269 }
258 270 }
259 271
260 272 impl AsRef<HgPath> for HgPath {
261 273 fn as_ref(&self) -> &HgPath {
262 274 self
263 275 }
264 276 }
265 277
266 278 impl AsRef<HgPath> for HgPathBuf {
267 279 fn as_ref(&self) -> &HgPath {
268 280 self
269 281 }
270 282 }
271 283
272 284 impl Extend<u8> for HgPathBuf {
273 285 fn extend<T: IntoIterator<Item = u8>>(&mut self, iter: T) {
274 286 self.inner.extend(iter);
275 287 }
276 288 }
277 289
278 290 /// TODO: Once https://www.mercurial-scm.org/wiki/WindowsUTF8Plan is
279 291 /// implemented, these conversion utils will have to work differently depending
280 292 /// on the repository encoding: either `UTF-8` or `MBCS`.
281 293
282 294 pub fn hg_path_to_os_string<P: AsRef<HgPath>>(
283 295 hg_path: P,
284 296 ) -> Result<OsString, HgPathError> {
285 297 hg_path.as_ref().check_state()?;
286 298 let os_str;
287 299 #[cfg(unix)]
288 300 {
289 301 use std::os::unix::ffi::OsStrExt;
290 302 os_str = std::ffi::OsStr::from_bytes(&hg_path.as_ref().as_bytes());
291 303 }
292 304 // TODO Handle other platforms
293 305 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
294 306 Ok(os_str.to_os_string())
295 307 }
296 308
297 309 pub fn hg_path_to_path_buf<P: AsRef<HgPath>>(
298 310 hg_path: P,
299 311 ) -> Result<PathBuf, HgPathError> {
300 312 Ok(Path::new(&hg_path_to_os_string(hg_path)?).to_path_buf())
301 313 }
302 314
303 315 pub fn os_string_to_hg_path_buf<S: AsRef<OsStr>>(
304 316 os_string: S,
305 317 ) -> Result<HgPathBuf, HgPathError> {
306 318 let buf;
307 319 #[cfg(unix)]
308 320 {
309 321 use std::os::unix::ffi::OsStrExt;
310 322 buf = HgPathBuf::from_bytes(&os_string.as_ref().as_bytes());
311 323 }
312 324 // TODO Handle other platforms
313 325 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
314 326
315 327 buf.check_state()?;
316 328 Ok(buf)
317 329 }
318 330
319 331 pub fn path_to_hg_path_buf<P: AsRef<Path>>(
320 332 path: P,
321 333 ) -> Result<HgPathBuf, HgPathError> {
322 334 let buf;
323 335 let os_str = path.as_ref().as_os_str();
324 336 #[cfg(unix)]
325 337 {
326 338 use std::os::unix::ffi::OsStrExt;
327 339 buf = HgPathBuf::from_bytes(&os_str.as_bytes());
328 340 }
329 341 // TODO Handle other platforms
330 342 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
331 343
332 344 buf.check_state()?;
333 345 Ok(buf)
334 346 }
335 347
336 348 #[cfg(test)]
337 349 mod tests {
338 350 use super::*;
339 351
340 352 #[test]
341 353 fn test_path_states() {
342 354 assert_eq!(
343 355 Err(HgPathError::LeadingSlash(b"/".to_vec())),
344 356 HgPath::new(b"/").check_state()
345 357 );
346 358 assert_eq!(
347 359 Err(HgPathError::ConsecutiveSlashes(b"a/b//c".to_vec(), 4)),
348 360 HgPath::new(b"a/b//c").check_state()
349 361 );
350 362 assert_eq!(
351 363 Err(HgPathError::ContainsNullByte(b"a/b/\0c".to_vec(), 4)),
352 364 HgPath::new(b"a/b/\0c").check_state()
353 365 );
354 366 // TODO test HgPathError::DecodeError for the Windows implementation.
355 367 assert_eq!(true, HgPath::new(b"").is_valid());
356 368 assert_eq!(true, HgPath::new(b"a/b/c").is_valid());
357 369 // Backslashes in paths are not significant, but allowed
358 370 assert_eq!(true, HgPath::new(br"a\b/c").is_valid());
359 371 // Dots in paths are not significant, but allowed
360 372 assert_eq!(true, HgPath::new(b"a/b/../c/").is_valid());
361 373 assert_eq!(true, HgPath::new(b"./a/b/../c/").is_valid());
362 374 }
363 375
364 376 #[test]
365 377 fn test_iter() {
366 378 let path = HgPath::new(b"a");
367 379 let mut iter = path.bytes();
368 380 assert_eq!(Some(&b'a'), iter.next());
369 381 assert_eq!(None, iter.next_back());
370 382 assert_eq!(None, iter.next());
371 383
372 384 let path = HgPath::new(b"a");
373 385 let mut iter = path.bytes();
374 386 assert_eq!(Some(&b'a'), iter.next_back());
375 387 assert_eq!(None, iter.next_back());
376 388 assert_eq!(None, iter.next());
377 389
378 390 let path = HgPath::new(b"abc");
379 391 let mut iter = path.bytes();
380 392 assert_eq!(Some(&b'a'), iter.next());
381 393 assert_eq!(Some(&b'c'), iter.next_back());
382 394 assert_eq!(Some(&b'b'), iter.next_back());
383 395 assert_eq!(None, iter.next_back());
384 396 assert_eq!(None, iter.next());
385 397
386 398 let path = HgPath::new(b"abc");
387 399 let mut iter = path.bytes();
388 400 assert_eq!(Some(&b'a'), iter.next());
389 401 assert_eq!(Some(&b'b'), iter.next());
390 402 assert_eq!(Some(&b'c'), iter.next());
391 403 assert_eq!(None, iter.next_back());
392 404 assert_eq!(None, iter.next());
393 405
394 406 let path = HgPath::new(b"abc");
395 407 let iter = path.bytes();
396 408 let mut vec = Vec::new();
397 409 vec.extend(iter);
398 410 assert_eq!(vec![b'a', b'b', b'c'], vec);
399 411
400 412 let path = HgPath::new(b"abc");
401 413 let mut iter = path.bytes();
402 414 assert_eq!(Some(2), iter.rposition(|c| *c == b'c'));
403 415
404 416 let path = HgPath::new(b"abc");
405 417 let mut iter = path.bytes();
406 418 assert_eq!(None, iter.rposition(|c| *c == b'd'));
407 419 }
408 420
409 421 #[test]
410 422 fn test_join() {
411 423 let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"b"));
412 424 assert_eq!(b"a/b", path.as_bytes());
413 425
414 426 let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"b/c"));
415 427 assert_eq!(b"a/b/c", path.as_bytes());
416 428
417 429 // No leading slash if empty before join
418 430 let path = HgPathBuf::new().join(HgPath::new(b"b/c"));
419 431 assert_eq!(b"b/c", path.as_bytes());
420 432
421 433 // The leading slash is an invalid representation of an `HgPath`, but
422 434 // it can happen. This creates another invalid representation of
423 435 // consecutive bytes.
424 436 // TODO What should be done in this case? Should we silently remove
425 437 // the extra slash? Should we change the signature to a problematic
426 438 // `Result<HgPathBuf, HgPathError>`, or should we just keep it so and
427 439 // let the error happen upon filesystem interaction?
428 440 let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"/b"));
429 441 assert_eq!(b"a//b", path.as_bytes());
430 442 let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"/b"));
431 443 assert_eq!(b"a//b", path.as_bytes());
432 444 }
433 445
434 446 #[test]
435 447 fn test_relative_to() {
436 448 let path = HgPath::new(b"");
437 449 let base = HgPath::new(b"");
438 450 assert_eq!(Some(path), path.relative_to(base));
439 451
440 452 let path = HgPath::new(b"path");
441 453 let base = HgPath::new(b"");
442 454 assert_eq!(Some(path), path.relative_to(base));
443 455
444 456 let path = HgPath::new(b"a");
445 457 let base = HgPath::new(b"b");
446 458 assert_eq!(None, path.relative_to(base));
447 459
448 460 let path = HgPath::new(b"a/b");
449 461 let base = HgPath::new(b"a");
450 462 assert_eq!(None, path.relative_to(base));
451 463
452 464 let path = HgPath::new(b"a/b");
453 465 let base = HgPath::new(b"a/");
454 466 assert_eq!(Some(HgPath::new(b"b")), path.relative_to(base));
455 467
456 468 let path = HgPath::new(b"nested/path/to/b");
457 469 let base = HgPath::new(b"nested/path/");
458 470 assert_eq!(Some(HgPath::new(b"to/b")), path.relative_to(base));
459 471
460 472 let path = HgPath::new(b"ends/with/dir/");
461 473 let base = HgPath::new(b"ends/");
462 474 assert_eq!(Some(HgPath::new(b"with/dir/")), path.relative_to(base));
463 475 }
464 476 }
General Comments 0
You need to be logged in to leave comments. Login now