##// END OF EJS Templates
rust-pathauditor: use interior mutability for use in multi-threaded contexts...
Raphaël Gomès -
r45022:07d9fd60 default
parent child Browse files
Show More
@@ -1,770 +1,770 b''
1 1 // status.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Rust implementation of dirstate.status (dirstate.py).
9 9 //! It is currently missing a lot of functionality compared to the Python one
10 10 //! and will only be triggered in narrow cases.
11 11
12 12 use crate::{
13 13 dirstate::SIZE_FROM_OTHER_PARENT,
14 14 filepatterns::PatternFileWarning,
15 15 matchers::{get_ignore_function, Matcher, VisitChildrenSet},
16 16 utils::{
17 17 files::{find_dirs, HgMetadata},
18 18 hg_path::{
19 19 hg_path_to_path_buf, os_string_to_hg_path_buf, HgPath, HgPathBuf,
20 20 HgPathError,
21 21 },
22 22 path_auditor::PathAuditor,
23 23 },
24 24 CopyMap, DirstateEntry, DirstateMap, EntryState, FastHashMap,
25 25 PatternError,
26 26 };
27 27 use lazy_static::lazy_static;
28 28 use rayon::prelude::*;
29 29 use std::collections::VecDeque;
30 30 use std::{
31 31 borrow::Cow,
32 32 collections::HashSet,
33 33 fs::{read_dir, DirEntry},
34 34 io::ErrorKind,
35 35 ops::Deref,
36 36 path::Path,
37 37 };
38 38
39 39 /// Wrong type of file from a `BadMatch`
40 40 /// Note: a lot of those don't exist on all platforms.
41 41 #[derive(Debug)]
42 42 pub enum BadType {
43 43 CharacterDevice,
44 44 BlockDevice,
45 45 FIFO,
46 46 Socket,
47 47 Directory,
48 48 Unknown,
49 49 }
50 50
51 51 impl ToString for BadType {
52 52 fn to_string(&self) -> String {
53 53 match self {
54 54 BadType::CharacterDevice => "character device",
55 55 BadType::BlockDevice => "block device",
56 56 BadType::FIFO => "fifo",
57 57 BadType::Socket => "socket",
58 58 BadType::Directory => "directory",
59 59 BadType::Unknown => "unknown",
60 60 }
61 61 .to_string()
62 62 }
63 63 }
64 64
65 65 /// Was explicitly matched but cannot be found/accessed
66 66 #[derive(Debug)]
67 67 pub enum BadMatch {
68 68 OsError(i32),
69 69 BadType(BadType),
70 70 }
71 71
72 72 /// Marker enum used to dispatch new status entries into the right collections.
73 73 /// Is similar to `crate::EntryState`, but represents the transient state of
74 74 /// entries during the lifetime of a command.
75 75 #[derive(Debug)]
76 76 enum Dispatch {
77 77 Unsure,
78 78 Modified,
79 79 Added,
80 80 Removed,
81 81 Deleted,
82 82 Clean,
83 83 Unknown,
84 84 Ignored,
85 85 /// Empty dispatch, the file is not worth listing
86 86 None,
87 87 /// Was explicitly matched but cannot be found/accessed
88 88 Bad(BadMatch),
89 89 Directory {
90 90 /// True if the directory used to be a file in the dmap so we can say
91 91 /// that it's been removed.
92 92 was_file: bool,
93 93 },
94 94 }
95 95
96 96 type IoResult<T> = std::io::Result<T>;
97 97
98 98 /// Dates and times that are outside the 31-bit signed range are compared
99 99 /// modulo 2^31. This should prevent hg from behaving badly with very large
100 100 /// files or corrupt dates while still having a high probability of detecting
101 101 /// changes. (issue2608)
102 102 /// TODO I haven't found a way of having `b` be `Into<i32>`, since `From<u64>`
103 103 /// is not defined for `i32`, and there is no `As` trait. This forces the
104 104 /// caller to cast `b` as `i32`.
105 105 fn mod_compare(a: i32, b: i32) -> bool {
106 106 a & i32::max_value() != b & i32::max_value()
107 107 }
108 108
109 109 /// Return a sorted list containing information about the entries
110 110 /// in the directory.
111 111 ///
112 112 /// * `skip_dot_hg` - Return an empty vec if `path` contains a `.hg` directory
113 113 fn list_directory(
114 114 path: impl AsRef<Path>,
115 115 skip_dot_hg: bool,
116 116 ) -> std::io::Result<Vec<(HgPathBuf, DirEntry)>> {
117 117 let mut results = vec![];
118 118 let entries = read_dir(path.as_ref())?;
119 119
120 120 for entry in entries {
121 121 let entry = entry?;
122 122 let filename = os_string_to_hg_path_buf(entry.file_name())?;
123 123 let file_type = entry.file_type()?;
124 124 if skip_dot_hg && filename.as_bytes() == b".hg" && file_type.is_dir() {
125 125 return Ok(vec![]);
126 126 } else {
127 127 results.push((HgPathBuf::from(filename), entry))
128 128 }
129 129 }
130 130
131 131 results.sort_unstable_by_key(|e| e.0.clone());
132 132 Ok(results)
133 133 }
134 134
135 135 /// The file corresponding to the dirstate entry was found on the filesystem.
136 136 fn dispatch_found(
137 137 filename: impl AsRef<HgPath>,
138 138 entry: DirstateEntry,
139 139 metadata: HgMetadata,
140 140 copy_map: &CopyMap,
141 141 options: StatusOptions,
142 142 ) -> Dispatch {
143 143 let DirstateEntry {
144 144 state,
145 145 mode,
146 146 mtime,
147 147 size,
148 148 } = entry;
149 149
150 150 let HgMetadata {
151 151 st_mode,
152 152 st_size,
153 153 st_mtime,
154 154 ..
155 155 } = metadata;
156 156
157 157 match state {
158 158 EntryState::Normal => {
159 159 let size_changed = mod_compare(size, st_size as i32);
160 160 let mode_changed =
161 161 (mode ^ st_mode as i32) & 0o100 != 0o000 && options.check_exec;
162 162 let metadata_changed = size >= 0 && (size_changed || mode_changed);
163 163 let other_parent = size == SIZE_FROM_OTHER_PARENT;
164 164 if metadata_changed
165 165 || other_parent
166 166 || copy_map.contains_key(filename.as_ref())
167 167 {
168 168 Dispatch::Modified
169 169 } else if mod_compare(mtime, st_mtime as i32) {
170 170 Dispatch::Unsure
171 171 } else if st_mtime == options.last_normal_time {
172 172 // the file may have just been marked as normal and
173 173 // it may have changed in the same second without
174 174 // changing its size. This can happen if we quickly
175 175 // do multiple commits. Force lookup, so we don't
176 176 // miss such a racy file change.
177 177 Dispatch::Unsure
178 178 } else if options.list_clean {
179 179 Dispatch::Clean
180 180 } else {
181 181 Dispatch::None
182 182 }
183 183 }
184 184 EntryState::Merged => Dispatch::Modified,
185 185 EntryState::Added => Dispatch::Added,
186 186 EntryState::Removed => Dispatch::Removed,
187 187 EntryState::Unknown => Dispatch::Unknown,
188 188 }
189 189 }
190 190
191 191 /// The file corresponding to this Dirstate entry is missing.
192 192 fn dispatch_missing(state: EntryState) -> Dispatch {
193 193 match state {
194 194 // File was removed from the filesystem during commands
195 195 EntryState::Normal | EntryState::Merged | EntryState::Added => {
196 196 Dispatch::Deleted
197 197 }
198 198 // File was removed, everything is normal
199 199 EntryState::Removed => Dispatch::Removed,
200 200 // File is unknown to Mercurial, everything is normal
201 201 EntryState::Unknown => Dispatch::Unknown,
202 202 }
203 203 }
204 204
205 205 lazy_static! {
206 206 static ref DEFAULT_WORK: HashSet<&'static HgPath> = {
207 207 let mut h = HashSet::new();
208 208 h.insert(HgPath::new(b""));
209 209 h
210 210 };
211 211 }
212 212
213 213 /// Get stat data about the files explicitly specified by match.
214 214 /// TODO subrepos
215 215 fn walk_explicit<'a>(
216 216 files: Option<&'a HashSet<&HgPath>>,
217 217 dmap: &'a DirstateMap,
218 218 root_dir: impl AsRef<Path> + Sync + Send + 'a,
219 219 options: StatusOptions,
220 220 ) -> impl ParallelIterator<Item = IoResult<(&'a HgPath, Dispatch)>> {
221 221 files
222 222 .unwrap_or(&DEFAULT_WORK)
223 223 .par_iter()
224 224 .map(move |filename| {
225 225 // TODO normalization
226 226 let normalized = filename.as_ref();
227 227
228 228 let buf = match hg_path_to_path_buf(normalized) {
229 229 Ok(x) => x,
230 230 Err(e) => return Some(Err(e.into())),
231 231 };
232 232 let target = root_dir.as_ref().join(buf);
233 233 let st = target.symlink_metadata();
234 234 let in_dmap = dmap.get(normalized);
235 235 match st {
236 236 Ok(meta) => {
237 237 let file_type = meta.file_type();
238 238 return if file_type.is_file() || file_type.is_symlink() {
239 239 if let Some(entry) = in_dmap {
240 240 return Some(Ok((
241 241 normalized,
242 242 dispatch_found(
243 243 &normalized,
244 244 *entry,
245 245 HgMetadata::from_metadata(meta),
246 246 &dmap.copy_map,
247 247 options,
248 248 ),
249 249 )));
250 250 }
251 251 Some(Ok((normalized, Dispatch::Unknown)))
252 252 } else {
253 253 if file_type.is_dir() {
254 254 Some(Ok((
255 255 normalized,
256 256 Dispatch::Directory {
257 257 was_file: in_dmap.is_some(),
258 258 },
259 259 )))
260 260 } else {
261 261 Some(Ok((
262 262 normalized,
263 263 Dispatch::Bad(BadMatch::BadType(
264 264 // TODO do more than unknown
265 265 // Support for all `BadType` variant
266 266 // varies greatly between platforms.
267 267 // So far, no tests check the type and
268 268 // this should be good enough for most
269 269 // users.
270 270 BadType::Unknown,
271 271 )),
272 272 )))
273 273 }
274 274 };
275 275 }
276 276 Err(_) => {
277 277 if let Some(entry) = in_dmap {
278 278 return Some(Ok((
279 279 normalized,
280 280 dispatch_missing(entry.state),
281 281 )));
282 282 }
283 283 }
284 284 };
285 285 None
286 286 })
287 287 .flatten()
288 288 }
289 289
290 290 #[derive(Debug, Copy, Clone)]
291 291 pub struct StatusOptions {
292 292 /// Remember the most recent modification timeslot for status, to make
293 293 /// sure we won't miss future size-preserving file content modifications
294 294 /// that happen within the same timeslot.
295 295 pub last_normal_time: i64,
296 296 /// Whether we are on a filesystem with UNIX-like exec flags
297 297 pub check_exec: bool,
298 298 pub list_clean: bool,
299 299 pub list_unknown: bool,
300 300 pub list_ignored: bool,
301 301 }
302 302
303 303 /// Dispatch a single file found during `traverse`.
304 304 /// If `file` is a folder that needs to be traversed, it will be pushed into
305 305 /// `work`.
306 306 fn traverse_worker<'a>(
307 307 work: &mut VecDeque<HgPathBuf>,
308 308 matcher: &impl Matcher,
309 309 dmap: &DirstateMap,
310 310 filename: impl AsRef<HgPath>,
311 311 dir_entry: &DirEntry,
312 312 ignore_fn: &impl for<'r> Fn(&'r HgPath) -> bool,
313 313 dir_ignore_fn: &impl for<'r> Fn(&'r HgPath) -> bool,
314 314 options: StatusOptions,
315 315 ) -> Option<IoResult<(Cow<'a, HgPath>, Dispatch)>> {
316 316 let file_type = match dir_entry.file_type() {
317 317 Ok(x) => x,
318 318 Err(e) => return Some(Err(e.into())),
319 319 };
320 320 let filename = filename.as_ref();
321 321 let entry_option = dmap.get(filename);
322 322
323 323 if file_type.is_dir() {
324 324 // Do we need to traverse it?
325 325 if !ignore_fn(&filename) || options.list_ignored {
326 326 work.push_front(filename.to_owned());
327 327 }
328 328 // Nested `if` until `rust-lang/rust#53668` is stable
329 329 if let Some(entry) = entry_option {
330 330 // Used to be a file, is now a folder
331 331 if matcher.matches_everything() || matcher.matches(&filename) {
332 332 return Some(Ok((
333 333 Cow::Owned(filename.to_owned()),
334 334 dispatch_missing(entry.state),
335 335 )));
336 336 }
337 337 }
338 338 } else if file_type.is_file() || file_type.is_symlink() {
339 339 if let Some(entry) = entry_option {
340 340 if matcher.matches_everything() || matcher.matches(&filename) {
341 341 let metadata = match dir_entry.metadata() {
342 342 Ok(x) => x,
343 343 Err(e) => return Some(Err(e.into())),
344 344 };
345 345 return Some(Ok((
346 346 Cow::Owned(filename.to_owned()),
347 347 dispatch_found(
348 348 &filename,
349 349 *entry,
350 350 HgMetadata::from_metadata(metadata),
351 351 &dmap.copy_map,
352 352 options,
353 353 ),
354 354 )));
355 355 }
356 356 } else if (matcher.matches_everything() || matcher.matches(&filename))
357 357 && !ignore_fn(&filename)
358 358 {
359 359 if (options.list_ignored || matcher.exact_match(&filename))
360 360 && dir_ignore_fn(&filename)
361 361 {
362 362 if options.list_ignored {
363 363 return Some(Ok((
364 364 Cow::Owned(filename.to_owned()),
365 365 Dispatch::Ignored,
366 366 )));
367 367 }
368 368 } else {
369 369 return Some(Ok((
370 370 Cow::Owned(filename.to_owned()),
371 371 Dispatch::Unknown,
372 372 )));
373 373 }
374 374 }
375 375 } else if let Some(entry) = entry_option {
376 376 // Used to be a file or a folder, now something else.
377 377 if matcher.matches_everything() || matcher.matches(&filename) {
378 378 return Some(Ok((
379 379 Cow::Owned(filename.to_owned()),
380 380 dispatch_missing(entry.state),
381 381 )));
382 382 }
383 383 }
384 384 None
385 385 }
386 386
387 387 /// Walk the working directory recursively to look for changes compared to the
388 388 /// current `DirstateMap`.
389 389 fn traverse<'a>(
390 390 matcher: &(impl Matcher + Sync),
391 391 root_dir: impl AsRef<Path>,
392 392 dmap: &DirstateMap,
393 393 path: impl AsRef<HgPath>,
394 394 old_results: FastHashMap<Cow<'a, HgPath>, Dispatch>,
395 395 ignore_fn: &(impl for<'r> Fn(&'r HgPath) -> bool + Sync),
396 396 dir_ignore_fn: &(impl for<'r> Fn(&'r HgPath) -> bool + Sync),
397 397 options: StatusOptions,
398 398 ) -> IoResult<FastHashMap<Cow<'a, HgPath>, Dispatch>> {
399 399 let root_dir = root_dir.as_ref();
400 400 let mut new_results = FastHashMap::default();
401 401
402 402 let mut work = VecDeque::new();
403 403 work.push_front(path.as_ref().to_owned());
404 404
405 405 while let Some(ref directory) = work.pop_front() {
406 406 if directory.as_bytes() == b".hg" {
407 407 continue;
408 408 }
409 409 let visit_entries = match matcher.visit_children_set(directory) {
410 410 VisitChildrenSet::Empty => continue,
411 411 VisitChildrenSet::This | VisitChildrenSet::Recursive => None,
412 412 VisitChildrenSet::Set(set) => Some(set),
413 413 };
414 414 let buf = hg_path_to_path_buf(directory)?;
415 415 let dir_path = root_dir.join(buf);
416 416
417 417 let skip_dot_hg = !directory.as_bytes().is_empty();
418 418 let entries = match list_directory(dir_path, skip_dot_hg) {
419 419 Err(e) => match e.kind() {
420 420 ErrorKind::NotFound | ErrorKind::PermissionDenied => {
421 421 new_results.insert(
422 422 Cow::Owned(directory.to_owned()),
423 423 Dispatch::Bad(BadMatch::OsError(
424 424 // Unwrapping here is OK because the error always
425 425 // is a real os error
426 426 e.raw_os_error().unwrap(),
427 427 )),
428 428 );
429 429 continue;
430 430 }
431 431 _ => return Err(e),
432 432 },
433 433 Ok(entries) => entries,
434 434 };
435 435
436 436 for (filename, dir_entry) in entries {
437 437 if let Some(ref set) = visit_entries {
438 438 if !set.contains(filename.deref()) {
439 439 continue;
440 440 }
441 441 }
442 442 // TODO normalize
443 443 let filename = if directory.is_empty() {
444 444 filename.to_owned()
445 445 } else {
446 446 directory.join(&filename)
447 447 };
448 448
449 449 if !old_results.contains_key(filename.deref()) {
450 450 if let Some((res, dispatch)) = traverse_worker(
451 451 &mut work,
452 452 matcher,
453 453 &dmap,
454 454 &filename,
455 455 &dir_entry,
456 456 &ignore_fn,
457 457 &dir_ignore_fn,
458 458 options,
459 459 )
460 460 .transpose()?
461 461 {
462 462 new_results.insert(res, dispatch);
463 463 }
464 464 }
465 465 }
466 466 }
467 467
468 468 new_results.extend(old_results.into_iter());
469 469
470 470 Ok(new_results)
471 471 }
472 472
473 473 /// Stat all entries in the `DirstateMap` and mark them for dispatch.
474 474 fn stat_dmap_entries(
475 475 dmap: &DirstateMap,
476 476 root_dir: impl AsRef<Path> + Sync + Send,
477 477 options: StatusOptions,
478 478 ) -> impl ParallelIterator<Item = IoResult<(&HgPath, Dispatch)>> {
479 479 dmap.par_iter().map(move |(filename, entry)| {
480 480 let filename: &HgPath = filename;
481 481 let filename_as_path = hg_path_to_path_buf(filename)?;
482 482 let meta = root_dir.as_ref().join(filename_as_path).symlink_metadata();
483 483
484 484 match meta {
485 485 Ok(ref m)
486 486 if !(m.file_type().is_file()
487 487 || m.file_type().is_symlink()) =>
488 488 {
489 489 Ok((filename, dispatch_missing(entry.state)))
490 490 }
491 491 Ok(m) => Ok((
492 492 filename,
493 493 dispatch_found(
494 494 filename,
495 495 *entry,
496 496 HgMetadata::from_metadata(m),
497 497 &dmap.copy_map,
498 498 options,
499 499 ),
500 500 )),
501 501 Err(ref e)
502 502 if e.kind() == ErrorKind::NotFound
503 503 || e.raw_os_error() == Some(20) =>
504 504 {
505 505 // Rust does not yet have an `ErrorKind` for
506 506 // `NotADirectory` (errno 20)
507 507 // It happens if the dirstate contains `foo/bar` and
508 508 // foo is not a directory
509 509 Ok((filename, dispatch_missing(entry.state)))
510 510 }
511 511 Err(e) => Err(e),
512 512 }
513 513 })
514 514 }
515 515
516 516 pub struct DirstateStatus<'a> {
517 517 pub modified: Vec<Cow<'a, HgPath>>,
518 518 pub added: Vec<Cow<'a, HgPath>>,
519 519 pub removed: Vec<Cow<'a, HgPath>>,
520 520 pub deleted: Vec<Cow<'a, HgPath>>,
521 521 pub clean: Vec<Cow<'a, HgPath>>,
522 522 pub ignored: Vec<Cow<'a, HgPath>>,
523 523 pub unknown: Vec<Cow<'a, HgPath>>,
524 524 pub bad: Vec<(Cow<'a, HgPath>, BadMatch)>,
525 525 }
526 526
527 527 fn build_response<'a>(
528 528 results: impl IntoIterator<Item = (Cow<'a, HgPath>, Dispatch)>,
529 529 ) -> (Vec<Cow<'a, HgPath>>, DirstateStatus<'a>) {
530 530 let mut lookup = vec![];
531 531 let mut modified = vec![];
532 532 let mut added = vec![];
533 533 let mut removed = vec![];
534 534 let mut deleted = vec![];
535 535 let mut clean = vec![];
536 536 let mut ignored = vec![];
537 537 let mut unknown = vec![];
538 538 let mut bad = vec![];
539 539
540 540 for (filename, dispatch) in results.into_iter() {
541 541 match dispatch {
542 542 Dispatch::Unknown => unknown.push(filename),
543 543 Dispatch::Unsure => lookup.push(filename),
544 544 Dispatch::Modified => modified.push(filename),
545 545 Dispatch::Added => added.push(filename),
546 546 Dispatch::Removed => removed.push(filename),
547 547 Dispatch::Deleted => deleted.push(filename),
548 548 Dispatch::Clean => clean.push(filename),
549 549 Dispatch::Ignored => ignored.push(filename),
550 550 Dispatch::None => {}
551 551 Dispatch::Bad(reason) => bad.push((filename, reason)),
552 552 Dispatch::Directory { .. } => {}
553 553 }
554 554 }
555 555
556 556 (
557 557 lookup,
558 558 DirstateStatus {
559 559 modified,
560 560 added,
561 561 removed,
562 562 deleted,
563 563 clean,
564 564 ignored,
565 565 unknown,
566 566 bad,
567 567 },
568 568 )
569 569 }
570 570
571 571 pub enum StatusError {
572 572 IO(std::io::Error),
573 573 Path(HgPathError),
574 574 Pattern(PatternError),
575 575 }
576 576
577 577 pub type StatusResult<T> = Result<T, StatusError>;
578 578
579 579 impl From<PatternError> for StatusError {
580 580 fn from(e: PatternError) -> Self {
581 581 StatusError::Pattern(e)
582 582 }
583 583 }
584 584 impl From<HgPathError> for StatusError {
585 585 fn from(e: HgPathError) -> Self {
586 586 StatusError::Path(e)
587 587 }
588 588 }
589 589 impl From<std::io::Error> for StatusError {
590 590 fn from(e: std::io::Error) -> Self {
591 591 StatusError::IO(e)
592 592 }
593 593 }
594 594
595 595 impl ToString for StatusError {
596 596 fn to_string(&self) -> String {
597 597 match self {
598 598 StatusError::IO(e) => e.to_string(),
599 599 StatusError::Path(e) => e.to_string(),
600 600 StatusError::Pattern(e) => e.to_string(),
601 601 }
602 602 }
603 603 }
604 604
605 605 /// Get the status of files in the working directory.
606 606 ///
607 607 /// This is the current entry-point for `hg-core` and is realistically unusable
608 608 /// outside of a Python context because its arguments need to provide a lot of
609 609 /// information that will not be necessary in the future.
610 610 pub fn status<'a: 'c, 'b: 'c, 'c>(
611 611 dmap: &'a DirstateMap,
612 612 matcher: &'b (impl Matcher + Sync),
613 613 root_dir: impl AsRef<Path> + Sync + Send + Copy + 'c,
614 614 ignore_files: &[impl AsRef<Path> + 'c],
615 615 options: StatusOptions,
616 616 ) -> StatusResult<(
617 617 (Vec<Cow<'c, HgPath>>, DirstateStatus<'c>),
618 618 Vec<PatternFileWarning>,
619 619 )> {
620 620 let (ignore_fn, warnings) = get_ignore_function(&ignore_files, root_dir)?;
621 621
622 622 // Is the path or one of its ancestors ignored?
623 623 let dir_ignore_fn = |dir: &_| {
624 624 if ignore_fn(dir) {
625 625 true
626 626 } else {
627 627 for p in find_dirs(dir) {
628 628 if ignore_fn(p) {
629 629 return true;
630 630 }
631 631 }
632 632 false
633 633 }
634 634 };
635 635
636 636 let files = matcher.file_set();
637 637
638 638 // Step 1: check the files explicitly mentioned by the user
639 639 let explicit = walk_explicit(files, &dmap, root_dir, options);
640 640 let (work, mut results): (Vec<_>, FastHashMap<_, _>) = explicit
641 641 .filter_map(Result::ok)
642 642 .map(|(filename, dispatch)| (Cow::Borrowed(filename), dispatch))
643 643 .partition(|(_, dispatch)| match dispatch {
644 644 Dispatch::Directory { .. } => true,
645 645 _ => false,
646 646 });
647 647
648 648 // Step 2: recursively check the working directory for changes if needed
649 649 for (dir, dispatch) in work {
650 650 match dispatch {
651 651 Dispatch::Directory { was_file } => {
652 652 if was_file {
653 653 results.insert(dir.to_owned(), Dispatch::Removed);
654 654 }
655 655 if options.list_ignored
656 656 || options.list_unknown && !dir_ignore_fn(&dir)
657 657 {
658 658 results = traverse(
659 659 matcher,
660 660 root_dir,
661 661 &dmap,
662 662 &dir,
663 663 results,
664 664 &ignore_fn,
665 665 &dir_ignore_fn,
666 666 options,
667 667 )?;
668 668 }
669 669 }
670 670 _ => unreachable!("There can only be directories in `work`"),
671 671 }
672 672 }
673 673
674 674 if !matcher.is_exact() {
675 675 // Step 3: Check the remaining files from the dmap.
676 676 // If a dmap file is not in results yet, it was either
677 677 // a) not matched b) ignored, c) missing, or d) under a
678 678 // symlink directory.
679 679
680 680 if options.list_unknown {
681 681 let to_visit: Box<dyn Iterator<Item = (&HgPath, &DirstateEntry)>> =
682 682 if results.is_empty() && matcher.matches_everything() {
683 683 Box::new(dmap.iter().map(|(f, e)| (f.deref(), e)))
684 684 } else {
685 685 Box::new(dmap.iter().filter_map(|(f, e)| {
686 686 if !results.contains_key(f.deref())
687 687 && matcher.matches(f)
688 688 {
689 689 Some((f.deref(), e))
690 690 } else {
691 691 None
692 692 }
693 693 }))
694 694 };
695 695 let mut to_visit: Vec<_> = to_visit.collect();
696 696 to_visit.sort_by(|a, b| a.0.cmp(&b.0));
697 697
698 698 // We walked all dirs under the roots that weren't ignored, and
699 699 // everything that matched was stat'ed and is already in results.
700 700 // The rest must thus be ignored or under a symlink.
701 let mut path_auditor = PathAuditor::new(root_dir);
701 let path_auditor = PathAuditor::new(root_dir);
702 702
703 703 for (ref filename, entry) in to_visit {
704 704 // Report ignored items in the dmap as long as they are not
705 705 // under a symlink directory.
706 706 if path_auditor.check(filename) {
707 707 // TODO normalize for case-insensitive filesystems
708 708 let buf = hg_path_to_path_buf(filename)?;
709 709 results.insert(
710 710 Cow::Borrowed(filename),
711 711 match root_dir.as_ref().join(&buf).symlink_metadata() {
712 712 // File was just ignored, no links, and exists
713 713 Ok(meta) => {
714 714 let metadata = HgMetadata::from_metadata(meta);
715 715 dispatch_found(
716 716 filename,
717 717 *entry,
718 718 metadata,
719 719 &dmap.copy_map,
720 720 options,
721 721 )
722 722 }
723 723 // File doesn't exist
724 724 Err(_) => dispatch_missing(entry.state),
725 725 },
726 726 );
727 727 } else {
728 728 // It's either missing or under a symlink directory which
729 729 // we, in this case, report as missing.
730 730 results.insert(
731 731 Cow::Borrowed(filename),
732 732 dispatch_missing(entry.state),
733 733 );
734 734 }
735 735 }
736 736 } else {
737 737 // We may not have walked the full directory tree above, so stat
738 738 // and check everything we missed.
739 739 let stat_results = stat_dmap_entries(&dmap, root_dir, options);
740 740 results.par_extend(stat_results.flatten().map(
741 741 |(filename, dispatch)| (Cow::Borrowed(filename), dispatch),
742 742 ));
743 743 }
744 744 }
745 745
746 746 let results = results.into_iter().filter_map(|(filename, dispatch)| {
747 747 match dispatch {
748 748 Dispatch::Bad(_) => return Some((filename, dispatch)),
749 749 _ => {}
750 750 };
751 751 // TODO do this in //, not at the end
752 752 if !dmap.contains_key(filename.deref()) {
753 753 if (options.list_ignored || matcher.exact_match(&filename))
754 754 && dir_ignore_fn(&filename)
755 755 {
756 756 if options.list_ignored {
757 757 return Some((filename.to_owned(), Dispatch::Ignored));
758 758 }
759 759 } else {
760 760 if !ignore_fn(&filename) {
761 761 return Some((filename.to_owned(), Dispatch::Unknown));
762 762 }
763 763 }
764 764 return None;
765 765 }
766 766 Some((filename, dispatch))
767 767 });
768 768
769 769 Ok((build_response(results), warnings))
770 770 }
@@ -1,384 +1,384 b''
1 1 // files.rs
2 2 //
3 3 // Copyright 2019
4 4 // Raphaël Gomès <rgomes@octobus.net>,
5 5 // Yuya Nishihara <yuya@tcha.org>
6 6 //
7 7 // This software may be used and distributed according to the terms of the
8 8 // GNU General Public License version 2 or any later version.
9 9
10 10 //! Functions for fiddling with files.
11 11
12 12 use crate::utils::{
13 13 hg_path::{path_to_hg_path_buf, HgPath, HgPathBuf, HgPathError},
14 14 path_auditor::PathAuditor,
15 15 replace_slice,
16 16 };
17 17 use lazy_static::lazy_static;
18 18 use same_file::is_same_file;
19 19 use std::borrow::ToOwned;
20 20 use std::fs::Metadata;
21 21 use std::iter::FusedIterator;
22 22 use std::ops::Deref;
23 23 use std::path::{Path, PathBuf};
24 24
25 25 pub fn get_path_from_bytes(bytes: &[u8]) -> &Path {
26 26 let os_str;
27 27 #[cfg(unix)]
28 28 {
29 29 use std::os::unix::ffi::OsStrExt;
30 30 os_str = std::ffi::OsStr::from_bytes(bytes);
31 31 }
32 32 // TODO Handle other platforms
33 33 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
34 34 // Perhaps, the return type would have to be Result<PathBuf>.
35 35
36 36 Path::new(os_str)
37 37 }
38 38
39 39 // TODO: need to convert from WTF8 to MBCS bytes on Windows.
40 40 // that's why Vec<u8> is returned.
41 41 #[cfg(unix)]
42 42 pub fn get_bytes_from_path(path: impl AsRef<Path>) -> Vec<u8> {
43 43 use std::os::unix::ffi::OsStrExt;
44 44 path.as_ref().as_os_str().as_bytes().to_vec()
45 45 }
46 46
47 47 /// An iterator over repository path yielding itself and its ancestors.
48 48 #[derive(Copy, Clone, Debug)]
49 49 pub struct Ancestors<'a> {
50 50 next: Option<&'a HgPath>,
51 51 }
52 52
53 53 impl<'a> Iterator for Ancestors<'a> {
54 54 type Item = &'a HgPath;
55 55
56 56 fn next(&mut self) -> Option<Self::Item> {
57 57 let next = self.next;
58 58 self.next = match self.next {
59 59 Some(s) if s.is_empty() => None,
60 60 Some(s) => {
61 61 let p = s.bytes().rposition(|c| *c == b'/').unwrap_or(0);
62 62 Some(HgPath::new(&s.as_bytes()[..p]))
63 63 }
64 64 None => None,
65 65 };
66 66 next
67 67 }
68 68 }
69 69
70 70 impl<'a> FusedIterator for Ancestors<'a> {}
71 71
72 72 /// An iterator over repository path yielding itself and its ancestors.
73 73 #[derive(Copy, Clone, Debug)]
74 74 pub(crate) struct AncestorsWithBase<'a> {
75 75 next: Option<(&'a HgPath, &'a HgPath)>,
76 76 }
77 77
78 78 impl<'a> Iterator for AncestorsWithBase<'a> {
79 79 type Item = (&'a HgPath, &'a HgPath);
80 80
81 81 fn next(&mut self) -> Option<Self::Item> {
82 82 let next = self.next;
83 83 self.next = match self.next {
84 84 Some((s, _)) if s.is_empty() => None,
85 85 Some((s, _)) => Some(s.split_filename()),
86 86 None => None,
87 87 };
88 88 next
89 89 }
90 90 }
91 91
92 92 impl<'a> FusedIterator for AncestorsWithBase<'a> {}
93 93
94 94 /// Returns an iterator yielding ancestor directories of the given repository
95 95 /// path.
96 96 ///
97 97 /// The path is separated by '/', and must not start with '/'.
98 98 ///
99 99 /// The path itself isn't included unless it is b"" (meaning the root
100 100 /// directory.)
101 101 pub fn find_dirs<'a>(path: &'a HgPath) -> Ancestors<'a> {
102 102 let mut dirs = Ancestors { next: Some(path) };
103 103 if !path.is_empty() {
104 104 dirs.next(); // skip itself
105 105 }
106 106 dirs
107 107 }
108 108
109 109 /// Returns an iterator yielding ancestor directories of the given repository
110 110 /// path.
111 111 ///
112 112 /// The path is separated by '/', and must not start with '/'.
113 113 ///
114 114 /// The path itself isn't included unless it is b"" (meaning the root
115 115 /// directory.)
116 116 pub(crate) fn find_dirs_with_base<'a>(
117 117 path: &'a HgPath,
118 118 ) -> AncestorsWithBase<'a> {
119 119 let mut dirs = AncestorsWithBase {
120 120 next: Some((path, HgPath::new(b""))),
121 121 };
122 122 if !path.is_empty() {
123 123 dirs.next(); // skip itself
124 124 }
125 125 dirs
126 126 }
127 127
128 128 /// TODO more than ASCII?
129 129 pub fn normalize_case(path: &HgPath) -> HgPathBuf {
130 130 #[cfg(windows)] // NTFS compares via upper()
131 131 return path.to_ascii_uppercase();
132 132 #[cfg(unix)]
133 133 path.to_ascii_lowercase()
134 134 }
135 135
136 136 lazy_static! {
137 137 static ref IGNORED_CHARS: Vec<Vec<u8>> = {
138 138 [
139 139 0x200c, 0x200d, 0x200e, 0x200f, 0x202a, 0x202b, 0x202c, 0x202d,
140 140 0x202e, 0x206a, 0x206b, 0x206c, 0x206d, 0x206e, 0x206f, 0xfeff,
141 141 ]
142 142 .iter()
143 143 .map(|code| {
144 144 std::char::from_u32(*code)
145 145 .unwrap()
146 146 .encode_utf8(&mut [0; 3])
147 147 .bytes()
148 148 .collect()
149 149 })
150 150 .collect()
151 151 };
152 152 }
153 153
154 154 fn hfs_ignore_clean(bytes: &[u8]) -> Vec<u8> {
155 155 let mut buf = bytes.to_owned();
156 156 let needs_escaping = bytes.iter().any(|b| *b == b'\xe2' || *b == b'\xef');
157 157 if needs_escaping {
158 158 for forbidden in IGNORED_CHARS.iter() {
159 159 replace_slice(&mut buf, forbidden, &[])
160 160 }
161 161 buf
162 162 } else {
163 163 buf
164 164 }
165 165 }
166 166
167 167 pub fn lower_clean(bytes: &[u8]) -> Vec<u8> {
168 168 hfs_ignore_clean(&bytes.to_ascii_lowercase())
169 169 }
170 170
171 171 #[derive(Eq, PartialEq, Ord, PartialOrd, Copy, Clone)]
172 172 pub struct HgMetadata {
173 173 pub st_dev: u64,
174 174 pub st_mode: u32,
175 175 pub st_nlink: u64,
176 176 pub st_size: u64,
177 177 pub st_mtime: i64,
178 178 pub st_ctime: i64,
179 179 }
180 180
181 181 // TODO support other plaforms
182 182 #[cfg(unix)]
183 183 impl HgMetadata {
184 184 pub fn from_metadata(metadata: Metadata) -> Self {
185 185 use std::os::unix::fs::MetadataExt;
186 186 Self {
187 187 st_dev: metadata.dev(),
188 188 st_mode: metadata.mode(),
189 189 st_nlink: metadata.nlink(),
190 190 st_size: metadata.size(),
191 191 st_mtime: metadata.mtime(),
192 192 st_ctime: metadata.ctime(),
193 193 }
194 194 }
195 195 }
196 196
197 197 /// Returns the canonical path of `name`, given `cwd` and `root`
198 198 pub fn canonical_path(
199 199 root: impl AsRef<Path>,
200 200 cwd: impl AsRef<Path>,
201 201 name: impl AsRef<Path>,
202 202 ) -> Result<PathBuf, HgPathError> {
203 203 // TODO add missing normalization for other platforms
204 204 let root = root.as_ref();
205 205 let cwd = cwd.as_ref();
206 206 let name = name.as_ref();
207 207
208 208 let name = if !name.is_absolute() {
209 209 root.join(&cwd).join(&name)
210 210 } else {
211 211 name.to_owned()
212 212 };
213 let mut auditor = PathAuditor::new(&root);
213 let auditor = PathAuditor::new(&root);
214 214 if name != root && name.starts_with(&root) {
215 215 let name = name.strip_prefix(&root).unwrap();
216 216 auditor.audit_path(path_to_hg_path_buf(name)?)?;
217 217 return Ok(name.to_owned());
218 218 } else if name == root {
219 219 return Ok("".into());
220 220 } else {
221 221 // Determine whether `name' is in the hierarchy at or beneath `root',
222 222 // by iterating name=name.parent() until it returns `None` (can't
223 223 // check name == '/', because that doesn't work on windows).
224 224 let mut name = name.deref();
225 225 let original_name = name.to_owned();
226 226 loop {
227 227 let same = is_same_file(&name, &root).unwrap_or(false);
228 228 if same {
229 229 if name == original_name {
230 230 // `name` was actually the same as root (maybe a symlink)
231 231 return Ok("".into());
232 232 }
233 233 // `name` is a symlink to root, so `original_name` is under
234 234 // root
235 235 let rel_path = original_name.strip_prefix(&name).unwrap();
236 236 auditor.audit_path(path_to_hg_path_buf(&rel_path)?)?;
237 237 return Ok(rel_path.to_owned());
238 238 }
239 239 name = match name.parent() {
240 240 None => break,
241 241 Some(p) => p,
242 242 };
243 243 }
244 244 // TODO hint to the user about using --cwd
245 245 // Bubble up the responsibility to Python for now
246 246 Err(HgPathError::NotUnderRoot {
247 247 path: original_name.to_owned(),
248 248 root: root.to_owned(),
249 249 })
250 250 }
251 251 }
252 252
253 253 #[cfg(test)]
254 254 mod tests {
255 255 use super::*;
256 256 use pretty_assertions::assert_eq;
257 257
258 258 #[test]
259 259 fn find_dirs_some() {
260 260 let mut dirs = super::find_dirs(HgPath::new(b"foo/bar/baz"));
261 261 assert_eq!(dirs.next(), Some(HgPath::new(b"foo/bar")));
262 262 assert_eq!(dirs.next(), Some(HgPath::new(b"foo")));
263 263 assert_eq!(dirs.next(), Some(HgPath::new(b"")));
264 264 assert_eq!(dirs.next(), None);
265 265 assert_eq!(dirs.next(), None);
266 266 }
267 267
268 268 #[test]
269 269 fn find_dirs_empty() {
270 270 // looks weird, but mercurial.pathutil.finddirs(b"") yields b""
271 271 let mut dirs = super::find_dirs(HgPath::new(b""));
272 272 assert_eq!(dirs.next(), Some(HgPath::new(b"")));
273 273 assert_eq!(dirs.next(), None);
274 274 assert_eq!(dirs.next(), None);
275 275 }
276 276
277 277 #[test]
278 278 fn test_find_dirs_with_base_some() {
279 279 let mut dirs = super::find_dirs_with_base(HgPath::new(b"foo/bar/baz"));
280 280 assert_eq!(
281 281 dirs.next(),
282 282 Some((HgPath::new(b"foo/bar"), HgPath::new(b"baz")))
283 283 );
284 284 assert_eq!(
285 285 dirs.next(),
286 286 Some((HgPath::new(b"foo"), HgPath::new(b"bar")))
287 287 );
288 288 assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b"foo"))));
289 289 assert_eq!(dirs.next(), None);
290 290 assert_eq!(dirs.next(), None);
291 291 }
292 292
293 293 #[test]
294 294 fn test_find_dirs_with_base_empty() {
295 295 let mut dirs = super::find_dirs_with_base(HgPath::new(b""));
296 296 assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b""))));
297 297 assert_eq!(dirs.next(), None);
298 298 assert_eq!(dirs.next(), None);
299 299 }
300 300
301 301 #[test]
302 302 fn test_canonical_path() {
303 303 let root = Path::new("/repo");
304 304 let cwd = Path::new("/dir");
305 305 let name = Path::new("filename");
306 306 assert_eq!(
307 307 canonical_path(root, cwd, name),
308 308 Err(HgPathError::NotUnderRoot {
309 309 path: PathBuf::from("/dir/filename"),
310 310 root: root.to_path_buf()
311 311 })
312 312 );
313 313
314 314 let root = Path::new("/repo");
315 315 let cwd = Path::new("/");
316 316 let name = Path::new("filename");
317 317 assert_eq!(
318 318 canonical_path(root, cwd, name),
319 319 Err(HgPathError::NotUnderRoot {
320 320 path: PathBuf::from("/filename"),
321 321 root: root.to_path_buf()
322 322 })
323 323 );
324 324
325 325 let root = Path::new("/repo");
326 326 let cwd = Path::new("/");
327 327 let name = Path::new("repo/filename");
328 328 assert_eq!(
329 329 canonical_path(root, cwd, name),
330 330 Ok(PathBuf::from("filename"))
331 331 );
332 332
333 333 let root = Path::new("/repo");
334 334 let cwd = Path::new("/repo");
335 335 let name = Path::new("filename");
336 336 assert_eq!(
337 337 canonical_path(root, cwd, name),
338 338 Ok(PathBuf::from("filename"))
339 339 );
340 340
341 341 let root = Path::new("/repo");
342 342 let cwd = Path::new("/repo/subdir");
343 343 let name = Path::new("filename");
344 344 assert_eq!(
345 345 canonical_path(root, cwd, name),
346 346 Ok(PathBuf::from("subdir/filename"))
347 347 );
348 348 }
349 349
350 350 #[test]
351 351 fn test_canonical_path_not_rooted() {
352 352 use std::fs::create_dir;
353 353 use tempfile::tempdir;
354 354
355 355 let base_dir = tempdir().unwrap();
356 356 let base_dir_path = base_dir.path();
357 357 let beneath_repo = base_dir_path.join("a");
358 358 let root = base_dir_path.join("a/b");
359 359 let out_of_repo = base_dir_path.join("c");
360 360 let under_repo_symlink = out_of_repo.join("d");
361 361
362 362 create_dir(&beneath_repo).unwrap();
363 363 create_dir(&root).unwrap();
364 364
365 365 // TODO make portable
366 366 std::os::unix::fs::symlink(&root, &out_of_repo).unwrap();
367 367
368 368 assert_eq!(
369 369 canonical_path(&root, Path::new(""), out_of_repo),
370 370 Ok(PathBuf::from(""))
371 371 );
372 372 assert_eq!(
373 373 canonical_path(&root, Path::new(""), &beneath_repo),
374 374 Err(HgPathError::NotUnderRoot {
375 375 path: beneath_repo.to_owned(),
376 376 root: root.to_owned()
377 377 })
378 378 );
379 379 assert_eq!(
380 380 canonical_path(&root, Path::new(""), &under_repo_symlink),
381 381 Ok(PathBuf::from("d"))
382 382 );
383 383 }
384 384 }
@@ -1,231 +1,232 b''
1 1 // path_auditor.rs
2 2 //
3 3 // Copyright 2020
4 4 // Raphaël Gomès <rgomes@octobus.net>,
5 5 //
6 6 // This software may be used and distributed according to the terms of the
7 7 // GNU General Public License version 2 or any later version.
8 8
9 9 use crate::utils::{
10 10 files::lower_clean,
11 11 find_slice_in_slice,
12 12 hg_path::{hg_path_to_path_buf, HgPath, HgPathBuf, HgPathError},
13 13 };
14 14 use std::collections::HashSet;
15 15 use std::path::{Path, PathBuf};
16 use std::sync::{Mutex, RwLock};
16 17
17 18 /// Ensures that a path is valid for use in the repository i.e. does not use
18 19 /// any banned components, does not traverse a symlink, etc.
19 20 #[derive(Debug, Default)]
20 21 pub struct PathAuditor {
21 audited: HashSet<HgPathBuf>,
22 audited_dirs: HashSet<HgPathBuf>,
22 audited: Mutex<HashSet<HgPathBuf>>,
23 audited_dirs: RwLock<HashSet<HgPathBuf>>,
23 24 root: PathBuf,
24 25 }
25 26
26 27 impl PathAuditor {
27 28 pub fn new(root: impl AsRef<Path>) -> Self {
28 29 Self {
29 30 root: root.as_ref().to_owned(),
30 31 ..Default::default()
31 32 }
32 33 }
33 34 pub fn audit_path(
34 &mut self,
35 &self,
35 36 path: impl AsRef<HgPath>,
36 37 ) -> Result<(), HgPathError> {
37 38 // TODO windows "localpath" normalization
38 39 let path = path.as_ref();
39 40 if path.is_empty() {
40 41 return Ok(());
41 42 }
42 43 // TODO case normalization
43 if self.audited.contains(path) {
44 if self.audited.lock().unwrap().contains(path) {
44 45 return Ok(());
45 46 }
46 47 // AIX ignores "/" at end of path, others raise EISDIR.
47 48 let last_byte = path.as_bytes()[path.len() - 1];
48 49 if last_byte == b'/' || last_byte == b'\\' {
49 50 return Err(HgPathError::EndsWithSlash(path.to_owned()));
50 51 }
51 52 let parts: Vec<_> = path
52 53 .as_bytes()
53 54 .split(|b| std::path::is_separator(*b as char))
54 55 .collect();
55 56
56 57 let first_component = lower_clean(parts[0]);
57 58 let first_component = first_component.as_slice();
58 59 if !path.split_drive().0.is_empty()
59 60 || (first_component == b".hg"
60 61 || first_component == b".hg."
61 62 || first_component == b"")
62 63 || parts.iter().any(|c| c == b"..")
63 64 {
64 65 return Err(HgPathError::InsideDotHg(path.to_owned()));
65 66 }
66 67
67 68 // Windows shortname aliases
68 69 for part in parts.iter() {
69 70 if part.contains(&b'~') {
70 71 let mut split = part.splitn(2, |b| *b == b'~');
71 72 let first =
72 73 split.next().unwrap().to_owned().to_ascii_uppercase();
73 74 let last = split.next().unwrap();
74 75 if last.iter().all(u8::is_ascii_digit)
75 76 && (first == b"HG" || first == b"HG8B6C")
76 77 {
77 78 return Err(HgPathError::ContainsIllegalComponent(
78 79 path.to_owned(),
79 80 ));
80 81 }
81 82 }
82 83 }
83 84 let lower_path = lower_clean(path.as_bytes());
84 85 if find_slice_in_slice(&lower_path, b".hg").is_some() {
85 86 let lower_parts: Vec<_> = path
86 87 .as_bytes()
87 88 .split(|b| std::path::is_separator(*b as char))
88 89 .collect();
89 90 for pattern in [b".hg".to_vec(), b".hg.".to_vec()].iter() {
90 91 if let Some(pos) = lower_parts[1..]
91 92 .iter()
92 93 .position(|part| part == &pattern.as_slice())
93 94 {
94 95 let base = lower_parts[..=pos]
95 96 .iter()
96 97 .fold(HgPathBuf::new(), |acc, p| {
97 98 acc.join(HgPath::new(p))
98 99 });
99 100 return Err(HgPathError::IsInsideNestedRepo {
100 101 path: path.to_owned(),
101 102 nested_repo: base,
102 103 });
103 104 }
104 105 }
105 106 }
106 107
107 108 let parts = &parts[..parts.len().saturating_sub(1)];
108 109
109 110 // We don't want to add "foo/bar/baz" to `audited_dirs` before checking
110 111 // if there's a "foo/.hg" directory. This also means we won't
111 112 // accidentally traverse a symlink into some other filesystem (which
112 113 // is potentially expensive to access).
113 114 for index in 0..parts.len() {
114 115 let prefix = &parts[..index + 1].join(&b'/');
115 116 let prefix = HgPath::new(prefix);
116 if self.audited_dirs.contains(prefix) {
117 if self.audited_dirs.read().unwrap().contains(prefix) {
117 118 continue;
118 119 }
119 120 self.check_filesystem(&prefix, &path)?;
120 self.audited_dirs.insert(prefix.to_owned());
121 self.audited_dirs.write().unwrap().insert(prefix.to_owned());
121 122 }
122 123
123 self.audited.insert(path.to_owned());
124 self.audited.lock().unwrap().insert(path.to_owned());
124 125
125 126 Ok(())
126 127 }
127 128
128 129 pub fn check_filesystem(
129 130 &self,
130 131 prefix: impl AsRef<HgPath>,
131 132 path: impl AsRef<HgPath>,
132 133 ) -> Result<(), HgPathError> {
133 134 let prefix = prefix.as_ref();
134 135 let path = path.as_ref();
135 136 let current_path = self.root.join(
136 137 hg_path_to_path_buf(prefix)
137 138 .map_err(|_| HgPathError::NotFsCompliant(path.to_owned()))?,
138 139 );
139 140 match std::fs::symlink_metadata(&current_path) {
140 141 Err(e) => {
141 142 // EINVAL can be raised as invalid path syntax under win32.
142 143 if e.kind() != std::io::ErrorKind::NotFound
143 144 && e.kind() != std::io::ErrorKind::InvalidInput
144 145 && e.raw_os_error() != Some(20)
145 146 {
146 147 // Rust does not yet have an `ErrorKind` for
147 148 // `NotADirectory` (errno 20)
148 149 // It happens if the dirstate contains `foo/bar` and
149 150 // foo is not a directory
150 151 return Err(HgPathError::NotFsCompliant(path.to_owned()));
151 152 }
152 153 }
153 154 Ok(meta) => {
154 155 if meta.file_type().is_symlink() {
155 156 return Err(HgPathError::TraversesSymbolicLink {
156 157 path: path.to_owned(),
157 158 symlink: prefix.to_owned(),
158 159 });
159 160 }
160 161 if meta.file_type().is_dir()
161 162 && current_path.join(".hg").is_dir()
162 163 {
163 164 return Err(HgPathError::IsInsideNestedRepo {
164 165 path: path.to_owned(),
165 166 nested_repo: prefix.to_owned(),
166 167 });
167 168 }
168 169 }
169 170 };
170 171
171 172 Ok(())
172 173 }
173 174
174 pub fn check(&mut self, path: impl AsRef<HgPath>) -> bool {
175 pub fn check(&self, path: impl AsRef<HgPath>) -> bool {
175 176 self.audit_path(path).is_ok()
176 177 }
177 178 }
178 179
179 180 #[cfg(test)]
180 181 mod tests {
181 182 use super::*;
182 183 use crate::utils::files::get_path_from_bytes;
183 184 use crate::utils::hg_path::path_to_hg_path_buf;
184 185
185 186 #[test]
186 187 fn test_path_auditor() {
187 let mut auditor = PathAuditor::new(get_path_from_bytes(b"/tmp"));
188 let auditor = PathAuditor::new(get_path_from_bytes(b"/tmp"));
188 189
189 190 let path = HgPath::new(b".hg/00changelog.i");
190 191 assert_eq!(
191 192 auditor.audit_path(path),
192 193 Err(HgPathError::InsideDotHg(path.to_owned()))
193 194 );
194 195 let path = HgPath::new(b"this/is/nested/.hg/thing.txt");
195 196 assert_eq!(
196 197 auditor.audit_path(path),
197 198 Err(HgPathError::IsInsideNestedRepo {
198 199 path: path.to_owned(),
199 200 nested_repo: HgPathBuf::from_bytes(b"this/is/nested")
200 201 })
201 202 );
202 203
203 204 use std::fs::{create_dir, File};
204 205 use tempfile::tempdir;
205 206
206 207 let base_dir = tempdir().unwrap();
207 208 let base_dir_path = base_dir.path();
208 209 let a = base_dir_path.join("a");
209 210 let b = base_dir_path.join("b");
210 211 create_dir(&a).unwrap();
211 212 let in_a_path = a.join("in_a");
212 213 File::create(in_a_path).unwrap();
213 214
214 215 // TODO make portable
215 216 std::os::unix::fs::symlink(&a, &b).unwrap();
216 217
217 218 let buf = b.join("in_a").components().skip(2).collect::<PathBuf>();
218 219 eprintln!("buf: {}", buf.display());
219 220 let path = path_to_hg_path_buf(buf).unwrap();
220 221 assert_eq!(
221 222 auditor.audit_path(&path),
222 223 Err(HgPathError::TraversesSymbolicLink {
223 224 path: path,
224 225 symlink: path_to_hg_path_buf(
225 226 b.components().skip(2).collect::<PathBuf>()
226 227 )
227 228 .unwrap()
228 229 })
229 230 );
230 231 }
231 232 }
General Comments 0
You need to be logged in to leave comments. Login now