##// END OF EJS Templates
rust-cross-platform: remove `unimplemented!` to get compile-time errors...
Raphaël Gomès -
r43525:60ee5842 default draft
parent child Browse files
Show More
@@ -1,104 +1,101 b''
1 1 // files.rs
2 2 //
3 3 // Copyright 2019
4 4 // Raphaël Gomès <rgomes@octobus.net>,
5 5 // Yuya Nishihara <yuya@tcha.org>
6 6 //
7 7 // This software may be used and distributed according to the terms of the
8 8 // GNU General Public License version 2 or any later version.
9 9
10 10 //! Functions for fiddling with files.
11 11
12 12 use crate::utils::hg_path::{HgPath, HgPathBuf};
13 13 use std::iter::FusedIterator;
14 14
15 15 use std::path::Path;
16 16
17 17 pub fn get_path_from_bytes(bytes: &[u8]) -> &Path {
18 18 let os_str;
19 19 #[cfg(unix)]
20 20 {
21 21 use std::os::unix::ffi::OsStrExt;
22 22 os_str = std::ffi::OsStr::from_bytes(bytes);
23 23 }
24 #[cfg(windows)]
25 {
26 // TODO: convert from Windows MBCS (ANSI encoding) to WTF8.
27 // Perhaps, the return type would have to be Result<PathBuf>.
28 unimplemented!()
29 }
24 // TODO Handle other platforms
25 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
26 // Perhaps, the return type would have to be Result<PathBuf>.
30 27
31 28 Path::new(os_str)
32 29 }
33 30
34 31 /// An iterator over repository path yielding itself and its ancestors.
35 32 #[derive(Copy, Clone, Debug)]
36 33 pub struct Ancestors<'a> {
37 34 next: Option<&'a HgPath>,
38 35 }
39 36
40 37 impl<'a> Iterator for Ancestors<'a> {
41 38 type Item = &'a HgPath;
42 39
43 40 fn next(&mut self) -> Option<Self::Item> {
44 41 let next = self.next;
45 42 self.next = match self.next {
46 43 Some(s) if s.is_empty() => None,
47 44 Some(s) => {
48 45 let p = s.bytes().rposition(|c| *c == b'/').unwrap_or(0);
49 46 Some(HgPath::new(&s.as_bytes()[..p]))
50 47 }
51 48 None => None,
52 49 };
53 50 next
54 51 }
55 52 }
56 53
57 54 impl<'a> FusedIterator for Ancestors<'a> {}
58 55
59 56 /// Returns an iterator yielding ancestor directories of the given repository
60 57 /// path.
61 58 ///
62 59 /// The path is separated by '/', and must not start with '/'.
63 60 ///
64 61 /// The path itself isn't included unless it is b"" (meaning the root
65 62 /// directory.)
66 63 pub fn find_dirs<'a>(path: &'a HgPath) -> Ancestors<'a> {
67 64 let mut dirs = Ancestors { next: Some(path) };
68 65 if !path.is_empty() {
69 66 dirs.next(); // skip itself
70 67 }
71 68 dirs
72 69 }
73 70
74 71 /// TODO more than ASCII?
75 72 pub fn normalize_case(path: &HgPath) -> HgPathBuf {
76 73 #[cfg(windows)] // NTFS compares via upper()
77 74 return path.to_ascii_uppercase();
78 75 #[cfg(unix)]
79 76 path.to_ascii_lowercase()
80 77 }
81 78
82 79 #[cfg(test)]
83 80 mod tests {
84 81 use super::*;
85 82
86 83 #[test]
87 84 fn find_dirs_some() {
88 85 let mut dirs = super::find_dirs(HgPath::new(b"foo/bar/baz"));
89 86 assert_eq!(dirs.next(), Some(HgPath::new(b"foo/bar")));
90 87 assert_eq!(dirs.next(), Some(HgPath::new(b"foo")));
91 88 assert_eq!(dirs.next(), Some(HgPath::new(b"")));
92 89 assert_eq!(dirs.next(), None);
93 90 assert_eq!(dirs.next(), None);
94 91 }
95 92
96 93 #[test]
97 94 fn find_dirs_empty() {
98 95 // looks weird, but mercurial.util.finddirs(b"") yields b""
99 96 let mut dirs = super::find_dirs(HgPath::new(b""));
100 97 assert_eq!(dirs.next(), Some(HgPath::new(b"")));
101 98 assert_eq!(dirs.next(), None);
102 99 assert_eq!(dirs.next(), None);
103 100 }
104 101 }
@@ -1,409 +1,402 b''
1 1 // hg_path.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use std::borrow::Borrow;
9 9 use std::ffi::{OsStr, OsString};
10 10 use std::ops::Deref;
11 11 use std::path::{Path, PathBuf};
12 12
13 13 #[derive(Debug, Eq, PartialEq)]
14 14 pub enum HgPathError {
15 15 /// Bytes from the invalid `HgPath`
16 16 LeadingSlash(Vec<u8>),
17 17 /// Bytes and index of the second slash
18 18 ConsecutiveSlashes(Vec<u8>, usize),
19 19 /// Bytes and index of the null byte
20 20 ContainsNullByte(Vec<u8>, usize),
21 21 /// Bytes
22 22 DecodeError(Vec<u8>),
23 23 }
24 24
25 25 impl ToString for HgPathError {
26 26 fn to_string(&self) -> String {
27 27 match self {
28 28 HgPathError::LeadingSlash(bytes) => {
29 29 format!("Invalid HgPath '{:?}': has a leading slash.", bytes)
30 30 }
31 31 HgPathError::ConsecutiveSlashes(bytes, pos) => format!(
32 32 "Invalid HgPath '{:?}': consecutive slahes at pos {}.",
33 33 bytes, pos
34 34 ),
35 35 HgPathError::ContainsNullByte(bytes, pos) => format!(
36 36 "Invalid HgPath '{:?}': contains null byte at pos {}.",
37 37 bytes, pos
38 38 ),
39 39 HgPathError::DecodeError(bytes) => {
40 40 format!("Invalid HgPath '{:?}': could not be decoded.", bytes)
41 41 }
42 42 }
43 43 }
44 44 }
45 45
46 46 impl From<HgPathError> for std::io::Error {
47 47 fn from(e: HgPathError) -> Self {
48 48 std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string())
49 49 }
50 50 }
51 51
52 52 /// This is a repository-relative path (or canonical path):
53 53 /// - no null characters
54 54 /// - `/` separates directories
55 55 /// - no consecutive slashes
56 56 /// - no leading slash,
57 57 /// - no `.` nor `..` of special meaning
58 58 /// - stored in repository and shared across platforms
59 59 ///
60 60 /// Note: there is no guarantee of any `HgPath` being well-formed at any point
61 61 /// in its lifetime for performance reasons and to ease ergonomics. It is
62 62 /// however checked using the `check_state` method before any file-system
63 63 /// operation.
64 64 ///
65 65 /// This allows us to be encoding-transparent as much as possible, until really
66 66 /// needed; `HgPath` can be transformed into a platform-specific path (`OsStr`
67 67 /// or `Path`) whenever more complex operations are needed:
68 68 /// On Unix, it's just byte-to-byte conversion. On Windows, it has to be
69 69 /// decoded from MBCS to WTF-8. If WindowsUTF8Plan is implemented, the source
70 70 /// character encoding will be determined on a per-repository basis.
71 71 //
72 72 // FIXME: (adapted from a comment in the stdlib)
73 73 // `HgPath::new()` current implementation relies on `Slice` being
74 74 // layout-compatible with `[u8]`.
75 75 // When attribute privacy is implemented, `Slice` should be annotated as
76 76 // `#[repr(transparent)]`.
77 77 // Anyway, `Slice` representation and layout are considered implementation
78 78 // detail, are not documented and must not be relied upon.
79 79 #[derive(Eq, Ord, PartialEq, PartialOrd, Debug, Hash)]
80 80 pub struct HgPath {
81 81 inner: [u8],
82 82 }
83 83
84 84 impl HgPath {
85 85 pub fn new<S: AsRef<[u8]> + ?Sized>(s: &S) -> &Self {
86 86 unsafe { &*(s.as_ref() as *const [u8] as *const Self) }
87 87 }
88 88 pub fn is_empty(&self) -> bool {
89 89 self.inner.is_empty()
90 90 }
91 91 pub fn len(&self) -> usize {
92 92 self.inner.len()
93 93 }
94 94 fn to_hg_path_buf(&self) -> HgPathBuf {
95 95 HgPathBuf {
96 96 inner: self.inner.to_owned(),
97 97 }
98 98 }
99 99 pub fn bytes(&self) -> std::slice::Iter<u8> {
100 100 self.inner.iter()
101 101 }
102 102 pub fn to_ascii_uppercase(&self) -> HgPathBuf {
103 103 HgPathBuf::from(self.inner.to_ascii_uppercase())
104 104 }
105 105 pub fn to_ascii_lowercase(&self) -> HgPathBuf {
106 106 HgPathBuf::from(self.inner.to_ascii_lowercase())
107 107 }
108 108 pub fn as_bytes(&self) -> &[u8] {
109 109 &self.inner
110 110 }
111 111 pub fn contains(&self, other: u8) -> bool {
112 112 self.inner.contains(&other)
113 113 }
114 114 pub fn join<T: ?Sized + AsRef<HgPath>>(&self, other: &T) -> HgPathBuf {
115 115 let mut inner = self.inner.to_owned();
116 116 if inner.len() != 0 && inner.last() != Some(&b'/') {
117 117 inner.push(b'/');
118 118 }
119 119 inner.extend(other.as_ref().bytes());
120 120 HgPathBuf::from_bytes(&inner)
121 121 }
122 122 /// Checks for errors in the path, short-circuiting at the first one.
123 123 /// This generates fine-grained errors useful for debugging.
124 124 /// To simply check if the path is valid during tests, use `is_valid`.
125 125 pub fn check_state(&self) -> Result<(), HgPathError> {
126 126 if self.len() == 0 {
127 127 return Ok(());
128 128 }
129 129 let bytes = self.as_bytes();
130 130 let mut previous_byte = None;
131 131
132 132 if bytes[0] == b'/' {
133 133 return Err(HgPathError::LeadingSlash(bytes.to_vec()));
134 134 }
135 135 for (index, byte) in bytes.iter().enumerate() {
136 136 match byte {
137 137 0 => {
138 138 return Err(HgPathError::ContainsNullByte(
139 139 bytes.to_vec(),
140 140 index,
141 141 ))
142 142 }
143 143 b'/' => {
144 144 if previous_byte.is_some() && previous_byte == Some(b'/') {
145 145 return Err(HgPathError::ConsecutiveSlashes(
146 146 bytes.to_vec(),
147 147 index,
148 148 ));
149 149 }
150 150 }
151 151 _ => (),
152 152 };
153 153 previous_byte = Some(*byte);
154 154 }
155 155 Ok(())
156 156 }
157 157
158 158 #[cfg(test)]
159 159 /// Only usable during tests to force developers to handle invalid states
160 160 fn is_valid(&self) -> bool {
161 161 self.check_state().is_ok()
162 162 }
163 163 }
164 164
165 165 #[derive(Eq, Ord, Clone, PartialEq, PartialOrd, Debug, Hash)]
166 166 pub struct HgPathBuf {
167 167 inner: Vec<u8>,
168 168 }
169 169
170 170 impl HgPathBuf {
171 171 pub fn new() -> Self {
172 172 Self { inner: Vec::new() }
173 173 }
174 174 pub fn push(&mut self, byte: u8) {
175 175 self.inner.push(byte);
176 176 }
177 177 pub fn from_bytes(s: &[u8]) -> HgPathBuf {
178 178 HgPath::new(s).to_owned()
179 179 }
180 180 pub fn into_vec(self) -> Vec<u8> {
181 181 self.inner
182 182 }
183 183 pub fn as_ref(&self) -> &[u8] {
184 184 self.inner.as_ref()
185 185 }
186 186 }
187 187
188 188 impl Deref for HgPathBuf {
189 189 type Target = HgPath;
190 190
191 191 #[inline]
192 192 fn deref(&self) -> &HgPath {
193 193 &HgPath::new(&self.inner)
194 194 }
195 195 }
196 196
197 197 impl From<Vec<u8>> for HgPathBuf {
198 198 fn from(vec: Vec<u8>) -> Self {
199 199 Self { inner: vec }
200 200 }
201 201 }
202 202
203 203 impl<T: ?Sized + AsRef<HgPath>> From<&T> for HgPathBuf {
204 204 fn from(s: &T) -> HgPathBuf {
205 205 s.as_ref().to_owned()
206 206 }
207 207 }
208 208
209 209 impl Into<Vec<u8>> for HgPathBuf {
210 210 fn into(self) -> Vec<u8> {
211 211 self.inner
212 212 }
213 213 }
214 214
215 215 impl Borrow<HgPath> for HgPathBuf {
216 216 fn borrow(&self) -> &HgPath {
217 217 &HgPath::new(self.as_bytes())
218 218 }
219 219 }
220 220
221 221 impl ToOwned for HgPath {
222 222 type Owned = HgPathBuf;
223 223
224 224 fn to_owned(&self) -> HgPathBuf {
225 225 self.to_hg_path_buf()
226 226 }
227 227 }
228 228
229 229 impl AsRef<HgPath> for HgPath {
230 230 fn as_ref(&self) -> &HgPath {
231 231 self
232 232 }
233 233 }
234 234
235 235 impl AsRef<HgPath> for HgPathBuf {
236 236 fn as_ref(&self) -> &HgPath {
237 237 self
238 238 }
239 239 }
240 240
241 241 impl Extend<u8> for HgPathBuf {
242 242 fn extend<T: IntoIterator<Item = u8>>(&mut self, iter: T) {
243 243 self.inner.extend(iter);
244 244 }
245 245 }
246 246
247 247 /// TODO: Once https://www.mercurial-scm.org/wiki/WindowsUTF8Plan is
248 248 /// implemented, these conversion utils will have to work differently depending
249 249 /// on the repository encoding: either `UTF-8` or `MBCS`.
250 250
251 251 pub fn hg_path_to_os_string<P: AsRef<HgPath>>(
252 252 hg_path: P,
253 253 ) -> Result<OsString, HgPathError> {
254 254 hg_path.as_ref().check_state()?;
255 255 let os_str;
256 256 #[cfg(unix)]
257 257 {
258 258 use std::os::unix::ffi::OsStrExt;
259 259 os_str = std::ffi::OsStr::from_bytes(&hg_path.as_ref().as_bytes());
260 260 }
261 #[cfg(windows)]
262 {
263 // TODO: convert from Windows MBCS (ANSI encoding) to WTF8.
264 unimplemented!();
265 }
261 // TODO Handle other platforms
262 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
266 263 Ok(os_str.to_os_string())
267 264 }
268 265
269 266 pub fn hg_path_to_path_buf<P: AsRef<HgPath>>(
270 267 hg_path: P,
271 268 ) -> Result<PathBuf, HgPathError> {
272 269 Ok(Path::new(&hg_path_to_os_string(hg_path)?).to_path_buf())
273 270 }
274 271
275 272 pub fn os_string_to_hg_path_buf<S: AsRef<OsStr>>(
276 273 os_string: S,
277 274 ) -> Result<HgPathBuf, HgPathError> {
278 275 let buf;
279 276 #[cfg(unix)]
280 277 {
281 278 use std::os::unix::ffi::OsStrExt;
282 279 buf = HgPathBuf::from_bytes(&os_string.as_ref().as_bytes());
283 280 }
284 #[cfg(windows)]
285 {
286 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
287 unimplemented!();
288 }
281 // TODO Handle other platforms
282 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
283
289 284 buf.check_state()?;
290 285 Ok(buf)
291 286 }
292 287
293 288 pub fn path_to_hg_path_buf<P: AsRef<Path>>(
294 289 path: P,
295 290 ) -> Result<HgPathBuf, HgPathError> {
296 291 let buf;
297 292 let os_str = path.as_ref().as_os_str();
298 293 #[cfg(unix)]
299 294 {
300 295 use std::os::unix::ffi::OsStrExt;
301 296 buf = HgPathBuf::from_bytes(&os_str.as_bytes());
302 297 }
303 #[cfg(windows)]
304 {
305 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
306 unimplemented!();
307 }
298 // TODO Handle other platforms
299 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
300
308 301 buf.check_state()?;
309 302 Ok(buf)
310 303 }
311 304
312 305 #[cfg(test)]
313 306 mod tests {
314 307 use super::*;
315 308
316 309 #[test]
317 310 fn test_path_states() {
318 311 assert_eq!(
319 312 Err(HgPathError::LeadingSlash(b"/".to_vec())),
320 313 HgPath::new(b"/").check_state()
321 314 );
322 315 assert_eq!(
323 316 Err(HgPathError::ConsecutiveSlashes(b"a/b//c".to_vec(), 4)),
324 317 HgPath::new(b"a/b//c").check_state()
325 318 );
326 319 assert_eq!(
327 320 Err(HgPathError::ContainsNullByte(b"a/b/\0c".to_vec(), 4)),
328 321 HgPath::new(b"a/b/\0c").check_state()
329 322 );
330 323 // TODO test HgPathError::DecodeError for the Windows implementation.
331 324 assert_eq!(true, HgPath::new(b"").is_valid());
332 325 assert_eq!(true, HgPath::new(b"a/b/c").is_valid());
333 326 // Backslashes in paths are not significant, but allowed
334 327 assert_eq!(true, HgPath::new(br"a\b/c").is_valid());
335 328 // Dots in paths are not significant, but allowed
336 329 assert_eq!(true, HgPath::new(b"a/b/../c/").is_valid());
337 330 assert_eq!(true, HgPath::new(b"./a/b/../c/").is_valid());
338 331 }
339 332
340 333 #[test]
341 334 fn test_iter() {
342 335 let path = HgPath::new(b"a");
343 336 let mut iter = path.bytes();
344 337 assert_eq!(Some(&b'a'), iter.next());
345 338 assert_eq!(None, iter.next_back());
346 339 assert_eq!(None, iter.next());
347 340
348 341 let path = HgPath::new(b"a");
349 342 let mut iter = path.bytes();
350 343 assert_eq!(Some(&b'a'), iter.next_back());
351 344 assert_eq!(None, iter.next_back());
352 345 assert_eq!(None, iter.next());
353 346
354 347 let path = HgPath::new(b"abc");
355 348 let mut iter = path.bytes();
356 349 assert_eq!(Some(&b'a'), iter.next());
357 350 assert_eq!(Some(&b'c'), iter.next_back());
358 351 assert_eq!(Some(&b'b'), iter.next_back());
359 352 assert_eq!(None, iter.next_back());
360 353 assert_eq!(None, iter.next());
361 354
362 355 let path = HgPath::new(b"abc");
363 356 let mut iter = path.bytes();
364 357 assert_eq!(Some(&b'a'), iter.next());
365 358 assert_eq!(Some(&b'b'), iter.next());
366 359 assert_eq!(Some(&b'c'), iter.next());
367 360 assert_eq!(None, iter.next_back());
368 361 assert_eq!(None, iter.next());
369 362
370 363 let path = HgPath::new(b"abc");
371 364 let iter = path.bytes();
372 365 let mut vec = Vec::new();
373 366 vec.extend(iter);
374 367 assert_eq!(vec![b'a', b'b', b'c'], vec);
375 368
376 369 let path = HgPath::new(b"abc");
377 370 let mut iter = path.bytes();
378 371 assert_eq!(Some(2), iter.rposition(|c| *c == b'c'));
379 372
380 373 let path = HgPath::new(b"abc");
381 374 let mut iter = path.bytes();
382 375 assert_eq!(None, iter.rposition(|c| *c == b'd'));
383 376 }
384 377
385 378 #[test]
386 379 fn test_join() {
387 380 let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"b"));
388 381 assert_eq!(b"a/b", path.as_bytes());
389 382
390 383 let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"b/c"));
391 384 assert_eq!(b"a/b/c", path.as_bytes());
392 385
393 386 // No leading slash if empty before join
394 387 let path = HgPathBuf::new().join(HgPath::new(b"b/c"));
395 388 assert_eq!(b"b/c", path.as_bytes());
396 389
397 390 // The leading slash is an invalid representation of an `HgPath`, but
398 391 // it can happen. This creates another invalid representation of
399 392 // consecutive bytes.
400 393 // TODO What should be done in this case? Should we silently remove
401 394 // the extra slash? Should we change the signature to a problematic
402 395 // `Result<HgPathBuf, HgPathError>`, or should we just keep it so and
403 396 // let the error happen upon filesystem interaction?
404 397 let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"/b"));
405 398 assert_eq!(b"a//b", path.as_bytes());
406 399 let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"/b"));
407 400 assert_eq!(b"a//b", path.as_bytes());
408 401 }
409 402 }
General Comments 0
You need to be logged in to leave comments. Login now