##// END OF EJS Templates
rust-status: collect traversed directories if required...
Raphaël Gomès -
r45353:c802ec4f default
parent child Browse files
Show More
@@ -1,915 +1,957 b''
1 1 // status.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Rust implementation of dirstate.status (dirstate.py).
9 9 //! It is currently missing a lot of functionality compared to the Python one
10 10 //! and will only be triggered in narrow cases.
11 11
12 12 use crate::{
13 13 dirstate::SIZE_FROM_OTHER_PARENT,
14 14 filepatterns::PatternFileWarning,
15 15 matchers::{get_ignore_function, Matcher, VisitChildrenSet},
16 16 utils::{
17 17 files::{find_dirs, HgMetadata},
18 18 hg_path::{
19 19 hg_path_to_path_buf, os_string_to_hg_path_buf, HgPath, HgPathBuf,
20 20 HgPathError,
21 21 },
22 22 path_auditor::PathAuditor,
23 23 },
24 24 CopyMap, DirstateEntry, DirstateMap, EntryState, FastHashMap,
25 25 PatternError,
26 26 };
27 27 use lazy_static::lazy_static;
28 28 use micro_timer::timed;
29 29 use rayon::prelude::*;
30 30 use std::{
31 31 borrow::Cow,
32 32 collections::HashSet,
33 33 fs::{read_dir, DirEntry},
34 34 io::ErrorKind,
35 35 ops::Deref,
36 36 path::{Path, PathBuf},
37 37 };
38 38
39 39 /// Wrong type of file from a `BadMatch`
40 40 /// Note: a lot of those don't exist on all platforms.
41 41 #[derive(Debug, Copy, Clone)]
42 42 pub enum BadType {
43 43 CharacterDevice,
44 44 BlockDevice,
45 45 FIFO,
46 46 Socket,
47 47 Directory,
48 48 Unknown,
49 49 }
50 50
51 51 impl ToString for BadType {
52 52 fn to_string(&self) -> String {
53 53 match self {
54 54 BadType::CharacterDevice => "character device",
55 55 BadType::BlockDevice => "block device",
56 56 BadType::FIFO => "fifo",
57 57 BadType::Socket => "socket",
58 58 BadType::Directory => "directory",
59 59 BadType::Unknown => "unknown",
60 60 }
61 61 .to_string()
62 62 }
63 63 }
64 64
65 65 /// Was explicitly matched but cannot be found/accessed
66 66 #[derive(Debug, Copy, Clone)]
67 67 pub enum BadMatch {
68 68 OsError(i32),
69 69 BadType(BadType),
70 70 }
71 71
72 72 /// Marker enum used to dispatch new status entries into the right collections.
73 73 /// Is similar to `crate::EntryState`, but represents the transient state of
74 74 /// entries during the lifetime of a command.
75 75 #[derive(Debug, Copy, Clone)]
76 76 enum Dispatch {
77 77 Unsure,
78 78 Modified,
79 79 Added,
80 80 Removed,
81 81 Deleted,
82 82 Clean,
83 83 Unknown,
84 84 Ignored,
85 85 /// Empty dispatch, the file is not worth listing
86 86 None,
87 87 /// Was explicitly matched but cannot be found/accessed
88 88 Bad(BadMatch),
89 89 Directory {
90 90 /// True if the directory used to be a file in the dmap so we can say
91 91 /// that it's been removed.
92 92 was_file: bool,
93 93 },
94 94 }
95 95
96 96 type IoResult<T> = std::io::Result<T>;
97 97 /// `Box<dyn Trait>` is syntactic sugar for `Box<dyn Trait, 'static>`, so add
98 98 /// an explicit lifetime here to not fight `'static` bounds "out of nowhere".
99 99 type IgnoreFnType<'a> = Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>;
100 100
101 101 /// Dates and times that are outside the 31-bit signed range are compared
102 102 /// modulo 2^31. This should prevent hg from behaving badly with very large
103 103 /// files or corrupt dates while still having a high probability of detecting
104 104 /// changes. (issue2608)
105 105 /// TODO I haven't found a way of having `b` be `Into<i32>`, since `From<u64>`
106 106 /// is not defined for `i32`, and there is no `As` trait. This forces the
107 107 /// caller to cast `b` as `i32`.
108 108 fn mod_compare(a: i32, b: i32) -> bool {
109 109 a & i32::max_value() != b & i32::max_value()
110 110 }
111 111
112 112 /// Return a sorted list containing information about the entries
113 113 /// in the directory.
114 114 ///
115 115 /// * `skip_dot_hg` - Return an empty vec if `path` contains a `.hg` directory
116 116 fn list_directory(
117 117 path: impl AsRef<Path>,
118 118 skip_dot_hg: bool,
119 119 ) -> std::io::Result<Vec<(HgPathBuf, DirEntry)>> {
120 120 let mut results = vec![];
121 121 let entries = read_dir(path.as_ref())?;
122 122
123 123 for entry in entries {
124 124 let entry = entry?;
125 125 let filename = os_string_to_hg_path_buf(entry.file_name())?;
126 126 let file_type = entry.file_type()?;
127 127 if skip_dot_hg && filename.as_bytes() == b".hg" && file_type.is_dir() {
128 128 return Ok(vec![]);
129 129 } else {
130 130 results.push((HgPathBuf::from(filename), entry))
131 131 }
132 132 }
133 133
134 134 results.sort_unstable_by_key(|e| e.0.clone());
135 135 Ok(results)
136 136 }
137 137
138 138 /// The file corresponding to the dirstate entry was found on the filesystem.
139 139 fn dispatch_found(
140 140 filename: impl AsRef<HgPath>,
141 141 entry: DirstateEntry,
142 142 metadata: HgMetadata,
143 143 copy_map: &CopyMap,
144 144 options: StatusOptions,
145 145 ) -> Dispatch {
146 146 let DirstateEntry {
147 147 state,
148 148 mode,
149 149 mtime,
150 150 size,
151 151 } = entry;
152 152
153 153 let HgMetadata {
154 154 st_mode,
155 155 st_size,
156 156 st_mtime,
157 157 ..
158 158 } = metadata;
159 159
160 160 match state {
161 161 EntryState::Normal => {
162 162 let size_changed = mod_compare(size, st_size as i32);
163 163 let mode_changed =
164 164 (mode ^ st_mode as i32) & 0o100 != 0o000 && options.check_exec;
165 165 let metadata_changed = size >= 0 && (size_changed || mode_changed);
166 166 let other_parent = size == SIZE_FROM_OTHER_PARENT;
167 167 if metadata_changed
168 168 || other_parent
169 169 || copy_map.contains_key(filename.as_ref())
170 170 {
171 171 Dispatch::Modified
172 172 } else if mod_compare(mtime, st_mtime as i32) {
173 173 Dispatch::Unsure
174 174 } else if st_mtime == options.last_normal_time {
175 175 // the file may have just been marked as normal and
176 176 // it may have changed in the same second without
177 177 // changing its size. This can happen if we quickly
178 178 // do multiple commits. Force lookup, so we don't
179 179 // miss such a racy file change.
180 180 Dispatch::Unsure
181 181 } else if options.list_clean {
182 182 Dispatch::Clean
183 183 } else {
184 184 Dispatch::None
185 185 }
186 186 }
187 187 EntryState::Merged => Dispatch::Modified,
188 188 EntryState::Added => Dispatch::Added,
189 189 EntryState::Removed => Dispatch::Removed,
190 190 EntryState::Unknown => Dispatch::Unknown,
191 191 }
192 192 }
193 193
194 194 /// The file corresponding to this Dirstate entry is missing.
195 195 fn dispatch_missing(state: EntryState) -> Dispatch {
196 196 match state {
197 197 // File was removed from the filesystem during commands
198 198 EntryState::Normal | EntryState::Merged | EntryState::Added => {
199 199 Dispatch::Deleted
200 200 }
201 201 // File was removed, everything is normal
202 202 EntryState::Removed => Dispatch::Removed,
203 203 // File is unknown to Mercurial, everything is normal
204 204 EntryState::Unknown => Dispatch::Unknown,
205 205 }
206 206 }
207 207
208 208 lazy_static! {
209 209 static ref DEFAULT_WORK: HashSet<&'static HgPath> = {
210 210 let mut h = HashSet::new();
211 211 h.insert(HgPath::new(b""));
212 212 h
213 213 };
214 214 }
215 215
216 216 /// Get stat data about the files explicitly specified by match.
217 217 /// TODO subrepos
218 218 #[timed]
219 219 fn walk_explicit<'a>(
220 220 files: Option<&'a HashSet<&HgPath>>,
221 221 dmap: &'a DirstateMap,
222 222 root_dir: impl AsRef<Path> + Sync + Send + 'a,
223 223 options: StatusOptions,
224 traversed_sender: crossbeam::Sender<HgPathBuf>,
224 225 ) -> impl ParallelIterator<Item = IoResult<(&'a HgPath, Dispatch)>> {
225 226 files
226 227 .unwrap_or(&DEFAULT_WORK)
227 228 .par_iter()
228 229 .map(move |filename| {
229 230 // TODO normalization
230 231 let normalized = filename.as_ref();
231 232
232 233 let buf = match hg_path_to_path_buf(normalized) {
233 234 Ok(x) => x,
234 235 Err(e) => return Some(Err(e.into())),
235 236 };
236 237 let target = root_dir.as_ref().join(buf);
237 238 let st = target.symlink_metadata();
238 239 let in_dmap = dmap.get(normalized);
239 240 match st {
240 241 Ok(meta) => {
241 242 let file_type = meta.file_type();
242 243 return if file_type.is_file() || file_type.is_symlink() {
243 244 if let Some(entry) = in_dmap {
244 245 return Some(Ok((
245 246 normalized,
246 247 dispatch_found(
247 248 &normalized,
248 249 *entry,
249 250 HgMetadata::from_metadata(meta),
250 251 &dmap.copy_map,
251 252 options,
252 253 ),
253 254 )));
254 255 }
255 256 Some(Ok((normalized, Dispatch::Unknown)))
256 257 } else {
257 258 if file_type.is_dir() {
259 if options.collect_traversed_dirs {
260 // The receiver always outlives the sender,
261 // so unwrap.
262 traversed_sender
263 .send(normalized.to_owned())
264 .unwrap()
265 }
258 266 Some(Ok((
259 267 normalized,
260 268 Dispatch::Directory {
261 269 was_file: in_dmap.is_some(),
262 270 },
263 271 )))
264 272 } else {
265 273 Some(Ok((
266 274 normalized,
267 275 Dispatch::Bad(BadMatch::BadType(
268 276 // TODO do more than unknown
269 277 // Support for all `BadType` variant
270 278 // varies greatly between platforms.
271 279 // So far, no tests check the type and
272 280 // this should be good enough for most
273 281 // users.
274 282 BadType::Unknown,
275 283 )),
276 284 )))
277 285 }
278 286 };
279 287 }
280 288 Err(_) => {
281 289 if let Some(entry) = in_dmap {
282 290 return Some(Ok((
283 291 normalized,
284 292 dispatch_missing(entry.state),
285 293 )));
286 294 }
287 295 }
288 296 };
289 297 None
290 298 })
291 299 .flatten()
292 300 }
293 301
294 302 #[derive(Debug, Copy, Clone)]
295 303 pub struct StatusOptions {
296 304 /// Remember the most recent modification timeslot for status, to make
297 305 /// sure we won't miss future size-preserving file content modifications
298 306 /// that happen within the same timeslot.
299 307 pub last_normal_time: i64,
300 308 /// Whether we are on a filesystem with UNIX-like exec flags
301 309 pub check_exec: bool,
302 310 pub list_clean: bool,
303 311 pub list_unknown: bool,
304 312 pub list_ignored: bool,
313 /// Whether to collect traversed dirs for applying a callback later.
314 /// Used by `hg purge` for example.
315 pub collect_traversed_dirs: bool,
305 316 }
306 317
307 318 /// Dispatch a single entry (file, folder, symlink...) found during `traverse`.
308 319 /// If the entry is a folder that needs to be traversed, it will be handled
309 320 /// in a separate thread.
310 321 fn handle_traversed_entry<'a>(
311 322 scope: &rayon::Scope<'a>,
312 323 files_sender: &'a crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
313 324 matcher: &'a (impl Matcher + Sync),
314 325 root_dir: impl AsRef<Path> + Sync + Send + Copy + 'a,
315 326 dmap: &'a DirstateMap,
316 327 old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>,
317 328 ignore_fn: &'a IgnoreFnType,
318 329 dir_ignore_fn: &'a IgnoreFnType,
319 330 options: StatusOptions,
320 331 filename: HgPathBuf,
321 332 dir_entry: DirEntry,
333 traversed_sender: crossbeam::Sender<HgPathBuf>,
322 334 ) -> IoResult<()> {
323 335 let file_type = dir_entry.file_type()?;
324 336 let entry_option = dmap.get(&filename);
325 337
326 338 if filename.as_bytes() == b".hg" {
327 339 // Could be a directory or a symlink
328 340 return Ok(());
329 341 }
330 342
331 343 if file_type.is_dir() {
332 344 handle_traversed_dir(
333 345 scope,
334 346 files_sender,
335 347 matcher,
336 348 root_dir,
337 349 dmap,
338 350 old_results,
339 351 ignore_fn,
340 352 dir_ignore_fn,
341 353 options,
342 354 entry_option,
343 355 filename,
356 traversed_sender,
344 357 );
345 358 } else if file_type.is_file() || file_type.is_symlink() {
346 359 if let Some(entry) = entry_option {
347 360 if matcher.matches_everything() || matcher.matches(&filename) {
348 361 let metadata = dir_entry.metadata()?;
349 362 files_sender
350 363 .send(Ok((
351 364 filename.to_owned(),
352 365 dispatch_found(
353 366 &filename,
354 367 *entry,
355 368 HgMetadata::from_metadata(metadata),
356 369 &dmap.copy_map,
357 370 options,
358 371 ),
359 372 )))
360 373 .unwrap();
361 374 }
362 375 } else if (matcher.matches_everything() || matcher.matches(&filename))
363 376 && !ignore_fn(&filename)
364 377 {
365 378 if (options.list_ignored || matcher.exact_match(&filename))
366 379 && dir_ignore_fn(&filename)
367 380 {
368 381 if options.list_ignored {
369 382 files_sender
370 383 .send(Ok((filename.to_owned(), Dispatch::Ignored)))
371 384 .unwrap();
372 385 }
373 386 } else {
374 387 if options.list_unknown {
375 388 files_sender
376 389 .send(Ok((filename.to_owned(), Dispatch::Unknown)))
377 390 .unwrap();
378 391 }
379 392 }
380 393 } else if ignore_fn(&filename) && options.list_ignored {
381 394 files_sender
382 395 .send(Ok((filename.to_owned(), Dispatch::Ignored)))
383 396 .unwrap();
384 397 }
385 398 } else if let Some(entry) = entry_option {
386 399 // Used to be a file or a folder, now something else.
387 400 if matcher.matches_everything() || matcher.matches(&filename) {
388 401 files_sender
389 402 .send(Ok((filename.to_owned(), dispatch_missing(entry.state))))
390 403 .unwrap();
391 404 }
392 405 }
393 406
394 407 Ok(())
395 408 }
396 409
397 410 /// A directory was found in the filesystem and needs to be traversed
398 411 fn handle_traversed_dir<'a>(
399 412 scope: &rayon::Scope<'a>,
400 413 files_sender: &'a crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
401 414 matcher: &'a (impl Matcher + Sync),
402 415 root_dir: impl AsRef<Path> + Sync + Send + Copy + 'a,
403 416 dmap: &'a DirstateMap,
404 417 old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>,
405 418 ignore_fn: &'a IgnoreFnType,
406 419 dir_ignore_fn: &'a IgnoreFnType,
407 420 options: StatusOptions,
408 421 entry_option: Option<&'a DirstateEntry>,
409 422 directory: HgPathBuf,
423 traversed_sender: crossbeam::Sender<HgPathBuf>,
410 424 ) {
411 425 scope.spawn(move |_| {
412 426 // Nested `if` until `rust-lang/rust#53668` is stable
413 427 if let Some(entry) = entry_option {
414 428 // Used to be a file, is now a folder
415 429 if matcher.matches_everything() || matcher.matches(&directory) {
416 430 files_sender
417 431 .send(Ok((
418 432 directory.to_owned(),
419 433 dispatch_missing(entry.state),
420 434 )))
421 435 .unwrap();
422 436 }
423 437 }
424 438 // Do we need to traverse it?
425 439 if !ignore_fn(&directory) || options.list_ignored {
426 440 traverse_dir(
427 441 files_sender,
428 442 matcher,
429 443 root_dir,
430 444 dmap,
431 445 directory,
432 446 &old_results,
433 447 ignore_fn,
434 448 dir_ignore_fn,
435 449 options,
450 traversed_sender,
436 451 )
437 452 .unwrap_or_else(|e| files_sender.send(Err(e)).unwrap())
438 453 }
439 454 });
440 455 }
441 456
442 457 /// Decides whether the directory needs to be listed, and if so handles the
443 458 /// entries in a separate thread.
444 459 fn traverse_dir<'a>(
445 460 files_sender: &crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
446 461 matcher: &'a (impl Matcher + Sync),
447 462 root_dir: impl AsRef<Path> + Sync + Send + Copy,
448 463 dmap: &'a DirstateMap,
449 464 directory: impl AsRef<HgPath>,
450 465 old_results: &FastHashMap<Cow<'a, HgPath>, Dispatch>,
451 466 ignore_fn: &IgnoreFnType,
452 467 dir_ignore_fn: &IgnoreFnType,
453 468 options: StatusOptions,
469 traversed_sender: crossbeam::Sender<HgPathBuf>,
454 470 ) -> IoResult<()> {
455 471 let directory = directory.as_ref();
456 472
473 if options.collect_traversed_dirs {
474 // The receiver always outlives the sender, so unwrap.
475 traversed_sender.send(directory.to_owned()).unwrap()
476 }
477
457 478 let visit_entries = match matcher.visit_children_set(directory) {
458 479 VisitChildrenSet::Empty => return Ok(()),
459 480 VisitChildrenSet::This | VisitChildrenSet::Recursive => None,
460 481 VisitChildrenSet::Set(set) => Some(set),
461 482 };
462 483 let buf = hg_path_to_path_buf(directory)?;
463 484 let dir_path = root_dir.as_ref().join(buf);
464 485
465 486 let skip_dot_hg = !directory.as_bytes().is_empty();
466 487 let entries = match list_directory(dir_path, skip_dot_hg) {
467 488 Err(e) => match e.kind() {
468 489 ErrorKind::NotFound | ErrorKind::PermissionDenied => {
469 490 files_sender
470 491 .send(Ok((
471 492 directory.to_owned(),
472 493 Dispatch::Bad(BadMatch::OsError(
473 494 // Unwrapping here is OK because the error always
474 495 // is a real os error
475 496 e.raw_os_error().unwrap(),
476 497 )),
477 498 )))
478 499 .unwrap();
479 500 return Ok(());
480 501 }
481 502 _ => return Err(e),
482 503 },
483 504 Ok(entries) => entries,
484 505 };
485 506
486 507 rayon::scope(|scope| -> IoResult<()> {
487 508 for (filename, dir_entry) in entries {
488 509 if let Some(ref set) = visit_entries {
489 510 if !set.contains(filename.deref()) {
490 511 continue;
491 512 }
492 513 }
493 514 // TODO normalize
494 515 let filename = if directory.is_empty() {
495 516 filename.to_owned()
496 517 } else {
497 518 directory.join(&filename)
498 519 };
499 520
500 521 if !old_results.contains_key(filename.deref()) {
501 522 handle_traversed_entry(
502 523 scope,
503 524 files_sender,
504 525 matcher,
505 526 root_dir,
506 527 dmap,
507 528 old_results,
508 529 ignore_fn,
509 530 dir_ignore_fn,
510 531 options,
511 532 filename,
512 533 dir_entry,
534 traversed_sender.clone(),
513 535 )?;
514 536 }
515 537 }
516 538 Ok(())
517 539 })
518 540 }
519 541
520 542 /// Walk the working directory recursively to look for changes compared to the
521 543 /// current `DirstateMap`.
522 544 ///
523 545 /// This takes a mutable reference to the results to account for the `extend`
524 546 /// in timings
525 547 #[timed]
526 548 fn traverse<'a>(
527 549 matcher: &'a (impl Matcher + Sync),
528 550 root_dir: impl AsRef<Path> + Sync + Send + Copy,
529 551 dmap: &'a DirstateMap,
530 552 path: impl AsRef<HgPath>,
531 553 old_results: &FastHashMap<Cow<'a, HgPath>, Dispatch>,
532 554 ignore_fn: &IgnoreFnType,
533 555 dir_ignore_fn: &IgnoreFnType,
534 556 options: StatusOptions,
535 557 results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
558 traversed_sender: crossbeam::Sender<HgPathBuf>,
536 559 ) -> IoResult<()> {
537 560 let root_dir = root_dir.as_ref();
538 561
539 562 // The traversal is done in parallel, so use a channel to gather entries.
540 563 // `crossbeam::Sender` is `Send`, while `mpsc::Sender` is not.
541 564 let (files_transmitter, files_receiver) = crossbeam::channel::unbounded();
542 565
543 566 traverse_dir(
544 567 &files_transmitter,
545 568 matcher,
546 569 root_dir,
547 570 &dmap,
548 571 path,
549 572 &old_results,
550 573 &ignore_fn,
551 574 &dir_ignore_fn,
552 575 options,
576 traversed_sender,
553 577 )?;
554 578
555 579 // Disconnect the channel so the receiver stops waiting
556 580 drop(files_transmitter);
557 581
558 582 // TODO don't collect. Find a way of replicating the behavior of
559 583 // `itertools::process_results`, but for `rayon::ParallelIterator`
560 584 let new_results: IoResult<Vec<(Cow<'a, HgPath>, Dispatch)>> =
561 585 files_receiver
562 586 .into_iter()
563 587 .map(|item| {
564 588 let (f, d) = item?;
565 589 Ok((Cow::Owned(f), d))
566 590 })
567 591 .collect();
568 592
569 593 results.par_extend(new_results?);
570 594
571 595 Ok(())
572 596 }
573 597
574 598 /// Stat all entries in the `DirstateMap` and mark them for dispatch.
575 599 fn stat_dmap_entries(
576 600 dmap: &DirstateMap,
577 601 root_dir: impl AsRef<Path> + Sync + Send,
578 602 options: StatusOptions,
579 603 ) -> impl ParallelIterator<Item = IoResult<(&HgPath, Dispatch)>> {
580 604 dmap.par_iter().map(move |(filename, entry)| {
581 605 let filename: &HgPath = filename;
582 606 let filename_as_path = hg_path_to_path_buf(filename)?;
583 607 let meta = root_dir.as_ref().join(filename_as_path).symlink_metadata();
584 608
585 609 match meta {
586 610 Ok(ref m)
587 611 if !(m.file_type().is_file()
588 612 || m.file_type().is_symlink()) =>
589 613 {
590 614 Ok((filename, dispatch_missing(entry.state)))
591 615 }
592 616 Ok(m) => Ok((
593 617 filename,
594 618 dispatch_found(
595 619 filename,
596 620 *entry,
597 621 HgMetadata::from_metadata(m),
598 622 &dmap.copy_map,
599 623 options,
600 624 ),
601 625 )),
602 626 Err(ref e)
603 627 if e.kind() == ErrorKind::NotFound
604 628 || e.raw_os_error() == Some(20) =>
605 629 {
606 630 // Rust does not yet have an `ErrorKind` for
607 631 // `NotADirectory` (errno 20)
608 632 // It happens if the dirstate contains `foo/bar` and
609 633 // foo is not a directory
610 634 Ok((filename, dispatch_missing(entry.state)))
611 635 }
612 636 Err(e) => Err(e),
613 637 }
614 638 })
615 639 }
616 640
617 641 /// This takes a mutable reference to the results to account for the `extend`
618 642 /// in timings
619 643 #[timed]
620 644 fn extend_from_dmap<'a>(
621 645 dmap: &'a DirstateMap,
622 646 root_dir: impl AsRef<Path> + Sync + Send,
623 647 options: StatusOptions,
624 648 results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
625 649 ) {
626 650 results.par_extend(
627 651 stat_dmap_entries(dmap, root_dir, options)
628 652 .flatten()
629 653 .map(|(filename, dispatch)| (Cow::Borrowed(filename), dispatch)),
630 654 );
631 655 }
632 656
633 657 #[derive(Debug)]
634 658 pub struct DirstateStatus<'a> {
635 659 pub modified: Vec<Cow<'a, HgPath>>,
636 660 pub added: Vec<Cow<'a, HgPath>>,
637 661 pub removed: Vec<Cow<'a, HgPath>>,
638 662 pub deleted: Vec<Cow<'a, HgPath>>,
639 663 pub clean: Vec<Cow<'a, HgPath>>,
640 664 pub ignored: Vec<Cow<'a, HgPath>>,
641 665 pub unknown: Vec<Cow<'a, HgPath>>,
642 666 pub bad: Vec<(Cow<'a, HgPath>, BadMatch)>,
667 /// Only filled if `collect_traversed_dirs` is `true`
668 pub traversed: Vec<HgPathBuf>,
643 669 }
644 670
645 671 #[timed]
646 672 fn build_response<'a>(
647 673 results: impl IntoIterator<Item = (Cow<'a, HgPath>, Dispatch)>,
674 traversed: Vec<HgPathBuf>,
648 675 ) -> (Vec<Cow<'a, HgPath>>, DirstateStatus<'a>) {
649 676 let mut lookup = vec![];
650 677 let mut modified = vec![];
651 678 let mut added = vec![];
652 679 let mut removed = vec![];
653 680 let mut deleted = vec![];
654 681 let mut clean = vec![];
655 682 let mut ignored = vec![];
656 683 let mut unknown = vec![];
657 684 let mut bad = vec![];
658 685
659 686 for (filename, dispatch) in results.into_iter() {
660 687 match dispatch {
661 688 Dispatch::Unknown => unknown.push(filename),
662 689 Dispatch::Unsure => lookup.push(filename),
663 690 Dispatch::Modified => modified.push(filename),
664 691 Dispatch::Added => added.push(filename),
665 692 Dispatch::Removed => removed.push(filename),
666 693 Dispatch::Deleted => deleted.push(filename),
667 694 Dispatch::Clean => clean.push(filename),
668 695 Dispatch::Ignored => ignored.push(filename),
669 696 Dispatch::None => {}
670 697 Dispatch::Bad(reason) => bad.push((filename, reason)),
671 698 Dispatch::Directory { .. } => {}
672 699 }
673 700 }
674 701
675 702 (
676 703 lookup,
677 704 DirstateStatus {
678 705 modified,
679 706 added,
680 707 removed,
681 708 deleted,
682 709 clean,
683 710 ignored,
684 711 unknown,
685 712 bad,
713 traversed,
686 714 },
687 715 )
688 716 }
689 717
690 718 #[derive(Debug)]
691 719 pub enum StatusError {
692 720 IO(std::io::Error),
693 721 Path(HgPathError),
694 722 Pattern(PatternError),
695 723 }
696 724
697 725 pub type StatusResult<T> = Result<T, StatusError>;
698 726
699 727 impl From<PatternError> for StatusError {
700 728 fn from(e: PatternError) -> Self {
701 729 StatusError::Pattern(e)
702 730 }
703 731 }
704 732 impl From<HgPathError> for StatusError {
705 733 fn from(e: HgPathError) -> Self {
706 734 StatusError::Path(e)
707 735 }
708 736 }
709 737 impl From<std::io::Error> for StatusError {
710 738 fn from(e: std::io::Error) -> Self {
711 739 StatusError::IO(e)
712 740 }
713 741 }
714 742
715 743 impl ToString for StatusError {
716 744 fn to_string(&self) -> String {
717 745 match self {
718 746 StatusError::IO(e) => e.to_string(),
719 747 StatusError::Path(e) => e.to_string(),
720 748 StatusError::Pattern(e) => e.to_string(),
721 749 }
722 750 }
723 751 }
724 752
725 753 /// This takes a mutable reference to the results to account for the `extend`
726 754 /// in timings
727 755 #[timed]
728 756 fn handle_unknowns<'a>(
729 757 dmap: &'a DirstateMap,
730 758 matcher: &(impl Matcher + Sync),
731 759 root_dir: impl AsRef<Path> + Sync + Send + Copy,
732 760 options: StatusOptions,
733 761 results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
734 762 ) -> IoResult<()> {
735 763 let to_visit: Vec<(&HgPath, &DirstateEntry)> = if results.is_empty()
736 764 && matcher.matches_everything()
737 765 {
738 766 dmap.iter().map(|(f, e)| (f.deref(), e)).collect()
739 767 } else {
740 768 // Only convert to a hashmap if needed.
741 769 let old_results: FastHashMap<_, _> = results.iter().cloned().collect();
742 770 dmap.iter()
743 771 .filter_map(move |(f, e)| {
744 772 if !old_results.contains_key(f.deref()) && matcher.matches(f) {
745 773 Some((f.deref(), e))
746 774 } else {
747 775 None
748 776 }
749 777 })
750 778 .collect()
751 779 };
752 780
753 781 // We walked all dirs under the roots that weren't ignored, and
754 782 // everything that matched was stat'ed and is already in results.
755 783 // The rest must thus be ignored or under a symlink.
756 784 let path_auditor = PathAuditor::new(root_dir);
757 785
758 786 // TODO don't collect. Find a way of replicating the behavior of
759 787 // `itertools::process_results`, but for `rayon::ParallelIterator`
760 788 let new_results: IoResult<Vec<_>> = to_visit
761 789 .into_par_iter()
762 790 .filter_map(|(filename, entry)| -> Option<IoResult<_>> {
763 791 // Report ignored items in the dmap as long as they are not
764 792 // under a symlink directory.
765 793 if path_auditor.check(filename) {
766 794 // TODO normalize for case-insensitive filesystems
767 795 let buf = match hg_path_to_path_buf(filename) {
768 796 Ok(x) => x,
769 797 Err(e) => return Some(Err(e.into())),
770 798 };
771 799 Some(Ok((
772 800 Cow::Borrowed(filename),
773 801 match root_dir.as_ref().join(&buf).symlink_metadata() {
774 802 // File was just ignored, no links, and exists
775 803 Ok(meta) => {
776 804 let metadata = HgMetadata::from_metadata(meta);
777 805 dispatch_found(
778 806 filename,
779 807 *entry,
780 808 metadata,
781 809 &dmap.copy_map,
782 810 options,
783 811 )
784 812 }
785 813 // File doesn't exist
786 814 Err(_) => dispatch_missing(entry.state),
787 815 },
788 816 )))
789 817 } else {
790 818 // It's either missing or under a symlink directory which
791 819 // we, in this case, report as missing.
792 820 Some(Ok((
793 821 Cow::Borrowed(filename),
794 822 dispatch_missing(entry.state),
795 823 )))
796 824 }
797 825 })
798 826 .collect();
799 827
800 828 results.par_extend(new_results?);
801 829
802 830 Ok(())
803 831 }
804 832
805 833 /// Get the status of files in the working directory.
806 834 ///
807 835 /// This is the current entry-point for `hg-core` and is realistically unusable
808 836 /// outside of a Python context because its arguments need to provide a lot of
809 837 /// information that will not be necessary in the future.
810 838 #[timed]
811 839 pub fn status<'a: 'c, 'b: 'c, 'c>(
812 840 dmap: &'a DirstateMap,
813 841 matcher: &'b (impl Matcher + Sync),
814 842 root_dir: impl AsRef<Path> + Sync + Send + Copy + 'c,
815 843 ignore_files: Vec<PathBuf>,
816 844 options: StatusOptions,
817 845 ) -> StatusResult<(
818 846 (Vec<Cow<'c, HgPath>>, DirstateStatus<'c>),
819 847 Vec<PatternFileWarning>,
820 848 )> {
821 849 // Needs to outlive `dir_ignore_fn` since it's captured.
822 850 let mut ignore_fn: IgnoreFnType;
823 851
824 852 // Only involve real ignore mechanism if we're listing unknowns or ignored.
825 853 let (dir_ignore_fn, warnings): (IgnoreFnType, _) = if options.list_ignored
826 854 || options.list_unknown
827 855 {
828 856 let (ignore, warnings) = get_ignore_function(ignore_files, root_dir)?;
829 857
830 858 ignore_fn = ignore;
831 859 let dir_ignore_fn = Box::new(|dir: &_| {
832 860 // Is the path or one of its ancestors ignored?
833 861 if ignore_fn(dir) {
834 862 true
835 863 } else {
836 864 for p in find_dirs(dir) {
837 865 if ignore_fn(p) {
838 866 return true;
839 867 }
840 868 }
841 869 false
842 870 }
843 871 });
844 872 (dir_ignore_fn, warnings)
845 873 } else {
846 874 ignore_fn = Box::new(|&_| true);
847 875 (Box::new(|&_| true), vec![])
848 876 };
849 877
850 878 let files = matcher.file_set();
851 879
880 // `crossbeam::Sender` is `Send`, while `mpsc::Sender` is not.
881 let (traversed_sender, traversed_recv) = crossbeam::channel::unbounded();
882
852 883 // Step 1: check the files explicitly mentioned by the user
853 let explicit = walk_explicit(files, &dmap, root_dir, options);
884 let explicit = walk_explicit(
885 files,
886 &dmap,
887 root_dir,
888 options,
889 traversed_sender.clone(),
890 );
854 891
855 892 // Collect results into a `Vec` because we do very few lookups in most
856 893 // cases.
857 894 let (work, mut results): (Vec<_>, Vec<_>) = explicit
858 895 .filter_map(Result::ok)
859 896 .map(|(filename, dispatch)| (Cow::Borrowed(filename), dispatch))
860 897 .partition(|(_, dispatch)| match dispatch {
861 898 Dispatch::Directory { .. } => true,
862 899 _ => false,
863 900 });
864 901
865 902 if !work.is_empty() {
866 903 // Hashmaps are quite a bit slower to build than vecs, so only build it
867 904 // if needed.
868 905 let old_results = results.iter().cloned().collect();
869 906
870 907 // Step 2: recursively check the working directory for changes if
871 908 // needed
872 909 for (dir, dispatch) in work {
873 910 match dispatch {
874 911 Dispatch::Directory { was_file } => {
875 912 if was_file {
876 913 results.push((dir.to_owned(), Dispatch::Removed));
877 914 }
878 915 if options.list_ignored
879 916 || options.list_unknown && !dir_ignore_fn(&dir)
880 917 {
881 918 traverse(
882 919 matcher,
883 920 root_dir,
884 921 &dmap,
885 922 &dir,
886 923 &old_results,
887 924 &ignore_fn,
888 925 &dir_ignore_fn,
889 926 options,
890 927 &mut results,
928 traversed_sender.clone(),
891 929 )?;
892 930 }
893 931 }
894 932 _ => unreachable!("There can only be directories in `work`"),
895 933 }
896 934 }
897 935 }
898 936
899 937 if !matcher.is_exact() {
900 938 // Step 3: Check the remaining files from the dmap.
901 939 // If a dmap file is not in results yet, it was either
902 940 // a) not matched b) ignored, c) missing, or d) under a
903 941 // symlink directory.
904 942
905 943 if options.list_unknown {
906 944 handle_unknowns(dmap, matcher, root_dir, options, &mut results)?;
907 945 } else {
908 946 // We may not have walked the full directory tree above, so stat
909 947 // and check everything we missed.
910 948 extend_from_dmap(&dmap, root_dir, options, &mut results);
911 949 }
912 950 }
913 951
914 Ok((build_response(results), warnings))
952 // Close the channel
953 drop(traversed_sender);
954 let traversed_dirs = traversed_recv.into_iter().collect();
955
956 Ok((build_response(results, traversed_dirs), warnings))
915 957 }
General Comments 0
You need to be logged in to leave comments. Login now