##// END OF EJS Templates
rust-status: improve documentation and readability...
Raphaël Gomès -
r45672:470d306e default
parent child Browse files
Show More
@@ -1,943 +1,954 b''
1 1 // status.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Rust implementation of dirstate.status (dirstate.py).
9 9 //! It is currently missing a lot of functionality compared to the Python one
10 10 //! and will only be triggered in narrow cases.
11 11
12 12 use crate::{
13 13 dirstate::SIZE_FROM_OTHER_PARENT,
14 14 filepatterns::PatternFileWarning,
15 15 matchers::{get_ignore_function, Matcher, VisitChildrenSet},
16 16 utils::{
17 17 files::{find_dirs, HgMetadata},
18 18 hg_path::{
19 19 hg_path_to_path_buf, os_string_to_hg_path_buf, HgPath, HgPathBuf,
20 20 HgPathError,
21 21 },
22 22 path_auditor::PathAuditor,
23 23 },
24 24 CopyMap, DirstateEntry, DirstateMap, EntryState, FastHashMap,
25 25 PatternError,
26 26 };
27 27 use lazy_static::lazy_static;
28 28 use micro_timer::timed;
29 29 use rayon::prelude::*;
30 30 use std::{
31 31 borrow::Cow,
32 32 collections::HashSet,
33 33 fs::{read_dir, DirEntry},
34 34 io::ErrorKind,
35 35 ops::Deref,
36 36 path::{Path, PathBuf},
37 37 };
38 38
39 39 /// Wrong type of file from a `BadMatch`
40 40 /// Note: a lot of those don't exist on all platforms.
41 41 #[derive(Debug, Copy, Clone)]
42 42 pub enum BadType {
43 43 CharacterDevice,
44 44 BlockDevice,
45 45 FIFO,
46 46 Socket,
47 47 Directory,
48 48 Unknown,
49 49 }
50 50
51 51 impl ToString for BadType {
52 52 fn to_string(&self) -> String {
53 53 match self {
54 54 BadType::CharacterDevice => "character device",
55 55 BadType::BlockDevice => "block device",
56 56 BadType::FIFO => "fifo",
57 57 BadType::Socket => "socket",
58 58 BadType::Directory => "directory",
59 59 BadType::Unknown => "unknown",
60 60 }
61 61 .to_string()
62 62 }
63 63 }
64 64
65 65 /// Was explicitly matched but cannot be found/accessed
66 66 #[derive(Debug, Copy, Clone)]
67 67 pub enum BadMatch {
68 68 OsError(i32),
69 69 BadType(BadType),
70 70 }
71 71
72 /// Marker enum used to dispatch new status entries into the right collections.
72 /// Enum used to dispatch new status entries into the right collections.
73 73 /// Is similar to `crate::EntryState`, but represents the transient state of
74 74 /// entries during the lifetime of a command.
75 75 #[derive(Debug, Copy, Clone)]
76 76 pub enum Dispatch {
77 77 Unsure,
78 78 Modified,
79 79 Added,
80 80 Removed,
81 81 Deleted,
82 82 Clean,
83 83 Unknown,
84 84 Ignored,
85 85 /// Empty dispatch, the file is not worth listing
86 86 None,
87 87 /// Was explicitly matched but cannot be found/accessed
88 88 Bad(BadMatch),
89 89 Directory {
90 90 /// True if the directory used to be a file in the dmap so we can say
91 91 /// that it's been removed.
92 92 was_file: bool,
93 93 },
94 94 }
95 95
96 96 type IoResult<T> = std::io::Result<T>;
97
97 98 /// `Box<dyn Trait>` is syntactic sugar for `Box<dyn Trait, 'static>`, so add
98 99 /// an explicit lifetime here to not fight `'static` bounds "out of nowhere".
99 100 type IgnoreFnType<'a> = Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>;
100 101
102 /// We have a good mix of owned (from directory traversal) and borrowed (from
103 /// the dirstate/explicit) paths, this comes up a lot.
104 type HgPathCow<'a> = Cow<'a, HgPath>;
105
106 /// A path with its computed ``Dispatch`` information
107 type DispatchedPath<'a> = (HgPathCow<'a>, Dispatch);
108
101 109 /// Dates and times that are outside the 31-bit signed range are compared
102 110 /// modulo 2^31. This should prevent hg from behaving badly with very large
103 111 /// files or corrupt dates while still having a high probability of detecting
104 112 /// changes. (issue2608)
105 113 /// TODO I haven't found a way of having `b` be `Into<i32>`, since `From<u64>`
106 114 /// is not defined for `i32`, and there is no `As` trait. This forces the
107 115 /// caller to cast `b` as `i32`.
108 116 fn mod_compare(a: i32, b: i32) -> bool {
109 117 a & i32::max_value() != b & i32::max_value()
110 118 }
111 119
112 120 /// Return a sorted list containing information about the entries
113 121 /// in the directory.
114 122 ///
115 123 /// * `skip_dot_hg` - Return an empty vec if `path` contains a `.hg` directory
116 124 fn list_directory(
117 125 path: impl AsRef<Path>,
118 126 skip_dot_hg: bool,
119 127 ) -> std::io::Result<Vec<(HgPathBuf, DirEntry)>> {
120 128 let mut results = vec![];
121 129 let entries = read_dir(path.as_ref())?;
122 130
123 131 for entry in entries {
124 132 let entry = entry?;
125 133 let filename = os_string_to_hg_path_buf(entry.file_name())?;
126 134 let file_type = entry.file_type()?;
127 135 if skip_dot_hg && filename.as_bytes() == b".hg" && file_type.is_dir() {
128 136 return Ok(vec![]);
129 137 } else {
130 138 results.push((filename, entry))
131 139 }
132 140 }
133 141
134 142 results.sort_unstable_by_key(|e| e.0.clone());
135 143 Ok(results)
136 144 }
137 145
138 146 /// The file corresponding to the dirstate entry was found on the filesystem.
139 147 fn dispatch_found(
140 148 filename: impl AsRef<HgPath>,
141 149 entry: DirstateEntry,
142 150 metadata: HgMetadata,
143 151 copy_map: &CopyMap,
144 152 options: StatusOptions,
145 153 ) -> Dispatch {
146 154 let DirstateEntry {
147 155 state,
148 156 mode,
149 157 mtime,
150 158 size,
151 159 } = entry;
152 160
153 161 let HgMetadata {
154 162 st_mode,
155 163 st_size,
156 164 st_mtime,
157 165 ..
158 166 } = metadata;
159 167
160 168 match state {
161 169 EntryState::Normal => {
162 170 let size_changed = mod_compare(size, st_size as i32);
163 171 let mode_changed =
164 172 (mode ^ st_mode as i32) & 0o100 != 0o000 && options.check_exec;
165 173 let metadata_changed = size >= 0 && (size_changed || mode_changed);
166 174 let other_parent = size == SIZE_FROM_OTHER_PARENT;
167 175
168 176 if metadata_changed
169 177 || other_parent
170 178 || copy_map.contains_key(filename.as_ref())
171 179 {
172 180 Dispatch::Modified
173 181 } else if mod_compare(mtime, st_mtime as i32)
174 182 || st_mtime == options.last_normal_time
175 183 {
176 184 // the file may have just been marked as normal and
177 185 // it may have changed in the same second without
178 186 // changing its size. This can happen if we quickly
179 187 // do multiple commits. Force lookup, so we don't
180 188 // miss such a racy file change.
181 189 Dispatch::Unsure
182 190 } else if options.list_clean {
183 191 Dispatch::Clean
184 192 } else {
185 193 Dispatch::None
186 194 }
187 195 }
188 196 EntryState::Merged => Dispatch::Modified,
189 197 EntryState::Added => Dispatch::Added,
190 198 EntryState::Removed => Dispatch::Removed,
191 199 EntryState::Unknown => Dispatch::Unknown,
192 200 }
193 201 }
194 202
195 203 /// The file corresponding to this Dirstate entry is missing.
196 204 fn dispatch_missing(state: EntryState) -> Dispatch {
197 205 match state {
198 206 // File was removed from the filesystem during commands
199 207 EntryState::Normal | EntryState::Merged | EntryState::Added => {
200 208 Dispatch::Deleted
201 209 }
202 210 // File was removed, everything is normal
203 211 EntryState::Removed => Dispatch::Removed,
204 212 // File is unknown to Mercurial, everything is normal
205 213 EntryState::Unknown => Dispatch::Unknown,
206 214 }
207 215 }
208 216
209 217 lazy_static! {
210 218 static ref DEFAULT_WORK: HashSet<&'static HgPath> = {
211 219 let mut h = HashSet::new();
212 220 h.insert(HgPath::new(b""));
213 221 h
214 222 };
215 223 }
216 224
217 225 #[derive(Debug, Copy, Clone)]
218 226 pub struct StatusOptions {
219 227 /// Remember the most recent modification timeslot for status, to make
220 228 /// sure we won't miss future size-preserving file content modifications
221 229 /// that happen within the same timeslot.
222 230 pub last_normal_time: i64,
223 231 /// Whether we are on a filesystem with UNIX-like exec flags
224 232 pub check_exec: bool,
225 233 pub list_clean: bool,
226 234 pub list_unknown: bool,
227 235 pub list_ignored: bool,
228 236 /// Whether to collect traversed dirs for applying a callback later.
229 237 /// Used by `hg purge` for example.
230 238 pub collect_traversed_dirs: bool,
231 239 }
232 240
233 241 #[derive(Debug)]
234 242 pub struct DirstateStatus<'a> {
235 pub modified: Vec<Cow<'a, HgPath>>,
236 pub added: Vec<Cow<'a, HgPath>>,
237 pub removed: Vec<Cow<'a, HgPath>>,
238 pub deleted: Vec<Cow<'a, HgPath>>,
239 pub clean: Vec<Cow<'a, HgPath>>,
240 pub ignored: Vec<Cow<'a, HgPath>>,
241 pub unknown: Vec<Cow<'a, HgPath>>,
242 pub bad: Vec<(Cow<'a, HgPath>, BadMatch)>,
243 pub modified: Vec<HgPathCow<'a>>,
244 pub added: Vec<HgPathCow<'a>>,
245 pub removed: Vec<HgPathCow<'a>>,
246 pub deleted: Vec<HgPathCow<'a>>,
247 pub clean: Vec<HgPathCow<'a>>,
248 pub ignored: Vec<HgPathCow<'a>>,
249 pub unknown: Vec<HgPathCow<'a>>,
250 pub bad: Vec<(HgPathCow<'a>, BadMatch)>,
243 251 /// Only filled if `collect_traversed_dirs` is `true`
244 252 pub traversed: Vec<HgPathBuf>,
245 253 }
246 254
247 255 #[derive(Debug)]
248 256 pub enum StatusError {
257 /// Generic IO error
249 258 IO(std::io::Error),
259 /// An invalid path that cannot be represented in Mercurial was found
250 260 Path(HgPathError),
261 /// An invalid "ignore" pattern was found
251 262 Pattern(PatternError),
252 263 }
253 264
254 265 pub type StatusResult<T> = Result<T, StatusError>;
255 266
256 267 impl From<PatternError> for StatusError {
257 268 fn from(e: PatternError) -> Self {
258 269 StatusError::Pattern(e)
259 270 }
260 271 }
261 272 impl From<HgPathError> for StatusError {
262 273 fn from(e: HgPathError) -> Self {
263 274 StatusError::Path(e)
264 275 }
265 276 }
266 277 impl From<std::io::Error> for StatusError {
267 278 fn from(e: std::io::Error) -> Self {
268 279 StatusError::IO(e)
269 280 }
270 281 }
271 282
272 283 impl ToString for StatusError {
273 284 fn to_string(&self) -> String {
274 285 match self {
275 286 StatusError::IO(e) => e.to_string(),
276 287 StatusError::Path(e) => e.to_string(),
277 288 StatusError::Pattern(e) => e.to_string(),
278 289 }
279 290 }
280 291 }
281 292
293 /// Gives information about which files are changed in the working directory
294 /// and how, compared to the revision we're based on
282 295 pub struct Status<'a, M: Matcher + Sync> {
283 296 dmap: &'a DirstateMap,
284 297 matcher: &'a M,
285 298 root_dir: PathBuf,
286 299 options: StatusOptions,
287 300 ignore_fn: IgnoreFnType<'a>,
288 301 }
289 302
290 303 impl<'a, M> Status<'a, M>
291 304 where
292 305 M: Matcher + Sync,
293 306 {
294 307 pub fn new(
295 308 dmap: &'a DirstateMap,
296 309 matcher: &'a M,
297 310 root_dir: PathBuf,
298 311 ignore_files: Vec<PathBuf>,
299 312 options: StatusOptions,
300 313 ) -> StatusResult<(Self, Vec<PatternFileWarning>)> {
301 314 // Needs to outlive `dir_ignore_fn` since it's captured.
302 315
303 316 let (ignore_fn, warnings): (IgnoreFnType, _) =
304 317 if options.list_ignored || options.list_unknown {
305 318 get_ignore_function(ignore_files, &root_dir)?
306 319 } else {
307 320 (Box::new(|&_| true), vec![])
308 321 };
309 322
310 323 Ok((
311 324 Self {
312 325 dmap,
313 326 matcher,
314 327 root_dir,
315 328 options,
316 329 ignore_fn,
317 330 },
318 331 warnings,
319 332 ))
320 333 }
321 334
335 /// Is the path ignored?
322 336 pub fn is_ignored(&self, path: impl AsRef<HgPath>) -> bool {
323 337 (self.ignore_fn)(path.as_ref())
324 338 }
325 339
326 340 /// Is the path or one of its ancestors ignored?
327 341 pub fn dir_ignore(&self, dir: impl AsRef<HgPath>) -> bool {
328 342 // Only involve ignore mechanism if we're listing unknowns or ignored.
329 343 if self.options.list_ignored || self.options.list_unknown {
330 344 if self.is_ignored(&dir) {
331 345 true
332 346 } else {
333 347 for p in find_dirs(dir.as_ref()) {
334 348 if self.is_ignored(p) {
335 349 return true;
336 350 }
337 351 }
338 352 false
339 353 }
340 354 } else {
341 355 true
342 356 }
343 357 }
344 358
345 /// Get stat data about the files explicitly specified by match.
359 /// Get stat data about the files explicitly specified by the matcher.
360 /// Returns a tuple of the directories that need to be traversed and the
361 /// files with their corresponding `Dispatch`.
346 362 /// TODO subrepos
347 363 #[timed]
348 364 pub fn walk_explicit(
349 365 &self,
350 366 traversed_sender: crossbeam::Sender<HgPathBuf>,
351 ) -> (
352 Vec<(Cow<'a, HgPath>, Dispatch)>,
353 Vec<(Cow<'a, HgPath>, Dispatch)>,
354 ) {
367 ) -> (Vec<DispatchedPath<'a>>, Vec<DispatchedPath<'a>>) {
355 368 self.matcher
356 369 .file_set()
357 370 .unwrap_or(&DEFAULT_WORK)
358 371 .par_iter()
359 372 .map(|&filename| -> Option<IoResult<_>> {
360 373 // TODO normalization
361 374 let normalized = filename;
362 375
363 376 let buf = match hg_path_to_path_buf(normalized) {
364 377 Ok(x) => x,
365 378 Err(e) => return Some(Err(e.into())),
366 379 };
367 380 let target = self.root_dir.join(buf);
368 381 let st = target.symlink_metadata();
369 382 let in_dmap = self.dmap.get(normalized);
370 383 match st {
371 384 Ok(meta) => {
372 385 let file_type = meta.file_type();
373 386 return if file_type.is_file() || file_type.is_symlink()
374 387 {
375 388 if let Some(entry) = in_dmap {
376 389 return Some(Ok((
377 390 Cow::Borrowed(normalized),
378 391 dispatch_found(
379 392 &normalized,
380 393 *entry,
381 394 HgMetadata::from_metadata(meta),
382 395 &self.dmap.copy_map,
383 396 self.options,
384 397 ),
385 398 )));
386 399 }
387 400 Some(Ok((
388 401 Cow::Borrowed(normalized),
389 402 Dispatch::Unknown,
390 403 )))
391 404 } else if file_type.is_dir() {
392 405 if self.options.collect_traversed_dirs {
393 406 traversed_sender
394 407 .send(normalized.to_owned())
395 408 .expect("receiver should outlive sender");
396 409 }
397 410 Some(Ok((
398 411 Cow::Borrowed(normalized),
399 412 Dispatch::Directory {
400 413 was_file: in_dmap.is_some(),
401 414 },
402 415 )))
403 416 } else {
404 417 Some(Ok((
405 418 Cow::Borrowed(normalized),
406 419 Dispatch::Bad(BadMatch::BadType(
407 420 // TODO do more than unknown
408 421 // Support for all `BadType` variant
409 422 // varies greatly between platforms.
410 423 // So far, no tests check the type and
411 424 // this should be good enough for most
412 425 // users.
413 426 BadType::Unknown,
414 427 )),
415 428 )))
416 429 };
417 430 }
418 431 Err(_) => {
419 432 if let Some(entry) = in_dmap {
420 433 return Some(Ok((
421 434 Cow::Borrowed(normalized),
422 435 dispatch_missing(entry.state),
423 436 )));
424 437 }
425 438 }
426 439 };
427 440 None
428 441 })
429 442 .flatten()
430 443 .filter_map(Result::ok)
431 444 .partition(|(_, dispatch)| match dispatch {
432 445 Dispatch::Directory { .. } => true,
433 446 _ => false,
434 447 })
435 448 }
436 449
437 450 /// Walk the working directory recursively to look for changes compared to
438 451 /// the current `DirstateMap`.
439 452 ///
440 453 /// This takes a mutable reference to the results to account for the
441 454 /// `extend` in timings
442 455 #[timed]
443 456 pub fn traverse(
444 457 &self,
445 458 path: impl AsRef<HgPath>,
446 old_results: &FastHashMap<Cow<'a, HgPath>, Dispatch>,
447 results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
459 old_results: &FastHashMap<HgPathCow<'a>, Dispatch>,
460 results: &mut Vec<DispatchedPath<'a>>,
448 461 traversed_sender: crossbeam::Sender<HgPathBuf>,
449 462 ) -> IoResult<()> {
450 463 // The traversal is done in parallel, so use a channel to gather
451 464 // entries. `crossbeam::Sender` is `Sync`, while `mpsc::Sender`
452 465 // is not.
453 466 let (files_transmitter, files_receiver) =
454 467 crossbeam::channel::unbounded();
455 468
456 469 self.traverse_dir(
457 470 &files_transmitter,
458 471 path,
459 472 &old_results,
460 473 traversed_sender,
461 474 )?;
462 475
463 476 // Disconnect the channel so the receiver stops waiting
464 477 drop(files_transmitter);
465 478
466 479 // TODO don't collect. Find a way of replicating the behavior of
467 480 // `itertools::process_results`, but for `rayon::ParallelIterator`
468 481 let new_results: IoResult<Vec<(Cow<HgPath>, Dispatch)>> =
469 482 files_receiver
470 483 .into_iter()
471 484 .map(|item| {
472 485 let (f, d) = item?;
473 486 Ok((Cow::Owned(f), d))
474 487 })
475 488 .collect();
476 489
477 490 results.par_extend(new_results?);
478 491
479 492 Ok(())
480 493 }
481 494
482 495 /// Dispatch a single entry (file, folder, symlink...) found during
483 496 /// `traverse`. If the entry is a folder that needs to be traversed, it
484 497 /// will be handled in a separate thread.
485 498 fn handle_traversed_entry<'b>(
486 499 &'a self,
487 500 scope: &rayon::Scope<'b>,
488 501 files_sender: &'b crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
489 502 old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>,
490 503 filename: HgPathBuf,
491 504 dir_entry: DirEntry,
492 505 traversed_sender: crossbeam::Sender<HgPathBuf>,
493 506 ) -> IoResult<()>
494 507 where
495 508 'a: 'b,
496 509 {
497 510 let file_type = dir_entry.file_type()?;
498 511 let entry_option = self.dmap.get(&filename);
499 512
500 513 if filename.as_bytes() == b".hg" {
501 514 // Could be a directory or a symlink
502 515 return Ok(());
503 516 }
504 517
505 518 if file_type.is_dir() {
506 519 self.handle_traversed_dir(
507 520 scope,
508 521 files_sender,
509 522 old_results,
510 523 entry_option,
511 524 filename,
512 525 traversed_sender,
513 526 );
514 527 } else if file_type.is_file() || file_type.is_symlink() {
515 528 if let Some(entry) = entry_option {
516 529 if self.matcher.matches_everything()
517 530 || self.matcher.matches(&filename)
518 531 {
519 532 let metadata = dir_entry.metadata()?;
520 533 files_sender
521 534 .send(Ok((
522 535 filename.to_owned(),
523 536 dispatch_found(
524 537 &filename,
525 538 *entry,
526 539 HgMetadata::from_metadata(metadata),
527 540 &self.dmap.copy_map,
528 541 self.options,
529 542 ),
530 543 )))
531 544 .unwrap();
532 545 }
533 546 } else if (self.matcher.matches_everything()
534 547 || self.matcher.matches(&filename))
535 548 && !self.is_ignored(&filename)
536 549 {
537 550 if (self.options.list_ignored
538 551 || self.matcher.exact_match(&filename))
539 552 && self.dir_ignore(&filename)
540 553 {
541 554 if self.options.list_ignored {
542 555 files_sender
543 556 .send(Ok((filename.to_owned(), Dispatch::Ignored)))
544 557 .unwrap();
545 558 }
546 559 } else if self.options.list_unknown {
547 560 files_sender
548 561 .send(Ok((filename.to_owned(), Dispatch::Unknown)))
549 562 .unwrap();
550 563 }
551 564 } else if self.is_ignored(&filename) && self.options.list_ignored {
552 565 files_sender
553 566 .send(Ok((filename.to_owned(), Dispatch::Ignored)))
554 567 .unwrap();
555 568 }
556 569 } else if let Some(entry) = entry_option {
557 570 // Used to be a file or a folder, now something else.
558 571 if self.matcher.matches_everything()
559 572 || self.matcher.matches(&filename)
560 573 {
561 574 files_sender
562 575 .send(Ok((
563 576 filename.to_owned(),
564 577 dispatch_missing(entry.state),
565 578 )))
566 579 .unwrap();
567 580 }
568 581 }
569 582
570 583 Ok(())
571 584 }
572 585
573 586 /// A directory was found in the filesystem and needs to be traversed
574 587 fn handle_traversed_dir<'b>(
575 588 &'a self,
576 589 scope: &rayon::Scope<'b>,
577 590 files_sender: &'b crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
578 591 old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>,
579 592 entry_option: Option<&'a DirstateEntry>,
580 593 directory: HgPathBuf,
581 594 traversed_sender: crossbeam::Sender<HgPathBuf>,
582 595 ) where
583 596 'a: 'b,
584 597 {
585 598 scope.spawn(move |_| {
586 599 // Nested `if` until `rust-lang/rust#53668` is stable
587 600 if let Some(entry) = entry_option {
588 601 // Used to be a file, is now a folder
589 602 if self.matcher.matches_everything()
590 603 || self.matcher.matches(&directory)
591 604 {
592 605 files_sender
593 606 .send(Ok((
594 607 directory.to_owned(),
595 608 dispatch_missing(entry.state),
596 609 )))
597 610 .unwrap();
598 611 }
599 612 }
600 613 // Do we need to traverse it?
601 614 if !self.is_ignored(&directory) || self.options.list_ignored {
602 615 self.traverse_dir(
603 616 files_sender,
604 617 directory,
605 618 &old_results,
606 619 traversed_sender,
607 620 )
608 621 .unwrap_or_else(|e| files_sender.send(Err(e)).unwrap())
609 622 }
610 623 });
611 624 }
612 625
613 626 /// Decides whether the directory needs to be listed, and if so handles the
614 627 /// entries in a separate thread.
615 628 fn traverse_dir(
616 629 &self,
617 630 files_sender: &crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
618 631 directory: impl AsRef<HgPath>,
619 632 old_results: &FastHashMap<Cow<HgPath>, Dispatch>,
620 633 traversed_sender: crossbeam::Sender<HgPathBuf>,
621 634 ) -> IoResult<()> {
622 635 let directory = directory.as_ref();
623 636
624 637 if self.options.collect_traversed_dirs {
625 638 traversed_sender
626 639 .send(directory.to_owned())
627 640 .expect("receiver should outlive sender");
628 641 }
629 642
630 643 let visit_entries = match self.matcher.visit_children_set(directory) {
631 644 VisitChildrenSet::Empty => return Ok(()),
632 645 VisitChildrenSet::This | VisitChildrenSet::Recursive => None,
633 646 VisitChildrenSet::Set(set) => Some(set),
634 647 };
635 648 let buf = hg_path_to_path_buf(directory)?;
636 649 let dir_path = self.root_dir.join(buf);
637 650
638 651 let skip_dot_hg = !directory.as_bytes().is_empty();
639 652 let entries = match list_directory(dir_path, skip_dot_hg) {
640 Err(e) => match e.kind() {
653 Err(e) => {
654 return match e.kind() {
641 655 ErrorKind::NotFound | ErrorKind::PermissionDenied => {
642 656 files_sender
643 657 .send(Ok((
644 658 directory.to_owned(),
645 659 Dispatch::Bad(BadMatch::OsError(
646 660 // Unwrapping here is OK because the error
647 661 // always is a
648 662 // real os error
649 663 e.raw_os_error().unwrap(),
650 664 )),
651 665 )))
652 .unwrap();
653 return Ok(());
666 .expect("receiver should outlive sender");
667 Ok(())
654 668 }
655 _ => return Err(e),
656 },
669 _ => Err(e),
670 };
671 }
657 672 Ok(entries) => entries,
658 673 };
659 674
660 675 rayon::scope(|scope| -> IoResult<()> {
661 676 for (filename, dir_entry) in entries {
662 677 if let Some(ref set) = visit_entries {
663 678 if !set.contains(filename.deref()) {
664 679 continue;
665 680 }
666 681 }
667 682 // TODO normalize
668 683 let filename = if directory.is_empty() {
669 684 filename.to_owned()
670 685 } else {
671 686 directory.join(&filename)
672 687 };
673 688
674 689 if !old_results.contains_key(filename.deref()) {
675 690 self.handle_traversed_entry(
676 691 scope,
677 692 files_sender,
678 693 old_results,
679 694 filename,
680 695 dir_entry,
681 696 traversed_sender.clone(),
682 697 )?;
683 698 }
684 699 }
685 700 Ok(())
686 701 })
687 702 }
688 703
704 /// Checks all files that are in the dirstate but were not found during the
705 /// working directory traversal. This means that the rest must
706 /// be either ignored, under a symlink or under a new nested repo.
707 ///
689 708 /// This takes a mutable reference to the results to account for the
690 709 /// `extend` in timings
691 710 #[timed]
692 711 fn handle_unknowns(
693 712 &self,
694 results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
713 results: &mut Vec<DispatchedPath<'a>>,
695 714 ) -> IoResult<()> {
696 715 let to_visit: Vec<(&HgPath, &DirstateEntry)> =
697 716 if results.is_empty() && self.matcher.matches_everything() {
698 717 self.dmap.iter().map(|(f, e)| (f.deref(), e)).collect()
699 718 } else {
700 719 // Only convert to a hashmap if needed.
701 720 let old_results: FastHashMap<_, _> =
702 721 results.iter().cloned().collect();
703 722 self.dmap
704 723 .iter()
705 724 .filter_map(move |(f, e)| {
706 725 if !old_results.contains_key(f.deref())
707 726 && self.matcher.matches(f)
708 727 {
709 728 Some((f.deref(), e))
710 729 } else {
711 730 None
712 731 }
713 732 })
714 733 .collect()
715 734 };
716 735
717 // We walked all dirs under the roots that weren't ignored, and
718 // everything that matched was stat'ed and is already in results.
719 // The rest must thus be ignored or under a symlink.
720 736 let path_auditor = PathAuditor::new(&self.root_dir);
721 737
722 738 // TODO don't collect. Find a way of replicating the behavior of
723 739 // `itertools::process_results`, but for `rayon::ParallelIterator`
724 740 let new_results: IoResult<Vec<_>> = to_visit
725 741 .into_par_iter()
726 742 .filter_map(|(filename, entry)| -> Option<IoResult<_>> {
727 743 // Report ignored items in the dmap as long as they are not
728 744 // under a symlink directory.
729 745 if path_auditor.check(filename) {
730 746 // TODO normalize for case-insensitive filesystems
731 747 let buf = match hg_path_to_path_buf(filename) {
732 748 Ok(x) => x,
733 749 Err(e) => return Some(Err(e.into())),
734 750 };
735 751 Some(Ok((
736 752 Cow::Borrowed(filename),
737 753 match self.root_dir.join(&buf).symlink_metadata() {
738 754 // File was just ignored, no links, and exists
739 755 Ok(meta) => {
740 756 let metadata = HgMetadata::from_metadata(meta);
741 757 dispatch_found(
742 758 filename,
743 759 *entry,
744 760 metadata,
745 761 &self.dmap.copy_map,
746 762 self.options,
747 763 )
748 764 }
749 765 // File doesn't exist
750 766 Err(_) => dispatch_missing(entry.state),
751 767 },
752 768 )))
753 769 } else {
754 770 // It's either missing or under a symlink directory which
755 771 // we, in this case, report as missing.
756 772 Some(Ok((
757 773 Cow::Borrowed(filename),
758 774 dispatch_missing(entry.state),
759 775 )))
760 776 }
761 777 })
762 778 .collect();
763 779
764 780 results.par_extend(new_results?);
765 781
766 782 Ok(())
767 783 }
768 784
785 /// Add the files in the dirstate to the results.
786 ///
769 787 /// This takes a mutable reference to the results to account for the
770 788 /// `extend` in timings
771 789 #[timed]
772 fn extend_from_dmap(
773 &self,
774 results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
775 ) {
790 fn extend_from_dmap(&self, results: &mut Vec<DispatchedPath<'a>>) {
776 791 results.par_extend(self.dmap.par_iter().flat_map(
777 792 move |(filename, entry)| {
778 793 let filename: &HgPath = filename;
779 794 let filename_as_path = hg_path_to_path_buf(filename)?;
780 795 let meta =
781 796 self.root_dir.join(filename_as_path).symlink_metadata();
782 797
783 798 match meta {
784 799 Ok(ref m)
785 800 if !(m.file_type().is_file()
786 801 || m.file_type().is_symlink()) =>
787 802 {
788 803 Ok((
789 804 Cow::Borrowed(filename),
790 805 dispatch_missing(entry.state),
791 806 ))
792 807 }
793 808 Ok(m) => Ok((
794 809 Cow::Borrowed(filename),
795 810 dispatch_found(
796 811 filename,
797 812 *entry,
798 813 HgMetadata::from_metadata(m),
799 814 &self.dmap.copy_map,
800 815 self.options,
801 816 ),
802 817 )),
803 818 Err(ref e)
804 819 if e.kind() == ErrorKind::NotFound
805 820 || e.raw_os_error() == Some(20) =>
806 821 {
807 822 // Rust does not yet have an `ErrorKind` for
808 823 // `NotADirectory` (errno 20)
809 824 // It happens if the dirstate contains `foo/bar`
810 825 // and foo is not a
811 826 // directory
812 827 Ok((
813 828 Cow::Borrowed(filename),
814 829 dispatch_missing(entry.state),
815 830 ))
816 831 }
817 832 Err(e) => Err(e),
818 833 }
819 834 },
820 835 ));
821 836 }
822 837 }
823 838
824 839 #[timed]
825 840 fn build_response<'a>(
826 results: impl IntoIterator<Item = (Cow<'a, HgPath>, Dispatch)>,
841 results: impl IntoIterator<Item = DispatchedPath<'a>>,
827 842 traversed: Vec<HgPathBuf>,
828 ) -> (Vec<Cow<'a, HgPath>>, DirstateStatus<'a>) {
843 ) -> (Vec<HgPathCow<'a>>, DirstateStatus<'a>) {
829 844 let mut lookup = vec![];
830 845 let mut modified = vec![];
831 846 let mut added = vec![];
832 847 let mut removed = vec![];
833 848 let mut deleted = vec![];
834 849 let mut clean = vec![];
835 850 let mut ignored = vec![];
836 851 let mut unknown = vec![];
837 852 let mut bad = vec![];
838 853
839 854 for (filename, dispatch) in results.into_iter() {
840 855 match dispatch {
841 856 Dispatch::Unknown => unknown.push(filename),
842 857 Dispatch::Unsure => lookup.push(filename),
843 858 Dispatch::Modified => modified.push(filename),
844 859 Dispatch::Added => added.push(filename),
845 860 Dispatch::Removed => removed.push(filename),
846 861 Dispatch::Deleted => deleted.push(filename),
847 862 Dispatch::Clean => clean.push(filename),
848 863 Dispatch::Ignored => ignored.push(filename),
849 864 Dispatch::None => {}
850 865 Dispatch::Bad(reason) => bad.push((filename, reason)),
851 866 Dispatch::Directory { .. } => {}
852 867 }
853 868 }
854 869
855 870 (
856 871 lookup,
857 872 DirstateStatus {
858 873 modified,
859 874 added,
860 875 removed,
861 876 deleted,
862 877 clean,
863 878 ignored,
864 879 unknown,
865 880 bad,
866 881 traversed,
867 882 },
868 883 )
869 884 }
870 885
871 886 /// Get the status of files in the working directory.
872 887 ///
873 888 /// This is the current entry-point for `hg-core` and is realistically unusable
874 889 /// outside of a Python context because its arguments need to provide a lot of
875 890 /// information that will not be necessary in the future.
876 891 #[timed]
877 892 pub fn status<'a>(
878 893 dmap: &'a DirstateMap,
879 894 matcher: &'a (impl Matcher + Sync),
880 895 root_dir: PathBuf,
881 896 ignore_files: Vec<PathBuf>,
882 897 options: StatusOptions,
883 898 ) -> StatusResult<(
884 (Vec<Cow<'a, HgPath>>, DirstateStatus<'a>),
899 (Vec<HgPathCow<'a>>, DirstateStatus<'a>),
885 900 Vec<PatternFileWarning>,
886 901 )> {
887 902 let (traversed_sender, traversed_receiver) =
888 903 crossbeam::channel::unbounded();
889 904 let (st, warnings) =
890 905 Status::new(dmap, matcher, root_dir, ignore_files, options)?;
891 906
892 907 // Step 1: check the files explicitly mentioned by the user
893 908 let (work, mut results) = st.walk_explicit(traversed_sender.clone());
894 909
895 910 if !work.is_empty() {
896 911 // Hashmaps are quite a bit slower to build than vecs, so only build it
897 912 // if needed.
898 913 let old_results = results.iter().cloned().collect();
899 914
900 915 // Step 2: recursively check the working directory for changes if
901 916 // needed
902 917 for (dir, dispatch) in work {
903 918 match dispatch {
904 919 Dispatch::Directory { was_file } => {
905 920 if was_file {
906 921 results.push((dir.to_owned(), Dispatch::Removed));
907 922 }
908 923 if options.list_ignored
909 924 || options.list_unknown && !st.dir_ignore(&dir)
910 925 {
911 926 st.traverse(
912 927 &dir,
913 928 &old_results,
914 929 &mut results,
915 930 traversed_sender.clone(),
916 931 )?;
917 932 }
918 933 }
919 934 _ => unreachable!("There can only be directories in `work`"),
920 935 }
921 936 }
922 937 }
923 938
924 939 if !matcher.is_exact() {
925 // Step 3: Check the remaining files from the dmap.
926 // If a dmap file is not in results yet, it was either
927 // a) not matched b) ignored, c) missing, or d) under a
928 // symlink directory.
929
930 940 if options.list_unknown {
931 941 st.handle_unknowns(&mut results)?;
932 942 } else {
933 // We may not have walked the full directory tree above, so stat
934 // and check everything we missed.
943 // TODO this is incorrect, see issue6335
944 // This requires a fix in both Python and Rust that can happen
945 // with other pending changes to `status`.
935 946 st.extend_from_dmap(&mut results);
936 947 }
937 948 }
938 949
939 950 drop(traversed_sender);
940 951 let traversed = traversed_receiver.into_iter().collect();
941 952
942 953 Ok((build_response(results, traversed), warnings))
943 954 }
General Comments 0
You need to be logged in to leave comments. Login now