##// END OF EJS Templates
rust-status: query fs traversal metadata lazily...
Raphaël Gomès -
r50459:da48f170 default
parent child Browse files
Show More
@@ -14,7 +14,6 b' use crate::utils::files::get_path_from_b'
14 14 use crate::utils::hg_path::HgPath;
15 15 use crate::BadMatch;
16 16 use crate::DirstateStatus;
17 use crate::HgPathBuf;
18 17 use crate::HgPathCow;
19 18 use crate::PatternFileWarning;
20 19 use crate::StatusError;
@@ -24,6 +23,8 b' use once_cell::sync::OnceCell;'
24 23 use rayon::prelude::*;
25 24 use sha1::{Digest, Sha1};
26 25 use std::borrow::Cow;
26 use std::convert::TryFrom;
27 use std::convert::TryInto;
27 28 use std::io;
28 29 use std::path::Path;
29 30 use std::path::PathBuf;
@@ -129,7 +130,6 b" pub fn status<'dirstate>("
129 130 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
130 131 let has_ignored_ancestor = HasIgnoredAncestor::create(None, hg_path);
131 132 let root_cached_mtime = None;
132 let root_dir_metadata = None;
133 133 // If the path we have for the repository root is a symlink, do follow it.
134 134 // (As opposed to symlinks within the working directory which are not
135 135 // followed, using `std::fs::symlink_metadata`.)
@@ -137,8 +137,12 b" pub fn status<'dirstate>("
137 137 &has_ignored_ancestor,
138 138 dmap.root.as_ref(),
139 139 hg_path,
140 &root_dir,
141 root_dir_metadata,
140 &DirEntry {
141 hg_path: Cow::Borrowed(HgPath::new(b"")),
142 fs_path: Cow::Borrowed(&root_dir),
143 symlink_metadata: None,
144 file_type: FakeFileType::Directory,
145 },
142 146 root_cached_mtime,
143 147 is_at_repo_root,
144 148 )?;
@@ -319,7 +323,7 b" impl<'a, 'tree, 'on_disk> StatusCommon<'"
319 323 /// need to call `read_dir`.
320 324 fn can_skip_fs_readdir(
321 325 &self,
322 directory_metadata: Option<&std::fs::Metadata>,
326 directory_entry: &DirEntry,
323 327 cached_directory_mtime: Option<TruncatedTimestamp>,
324 328 ) -> bool {
325 329 if !self.options.list_unknown && !self.options.list_ignored {
@@ -335,9 +339,9 b" impl<'a, 'tree, 'on_disk> StatusCommon<'"
335 339 // The dirstate contains a cached mtime for this directory, set
336 340 // by a previous run of the `status` algorithm which found this
337 341 // directory eligible for `read_dir` caching.
338 if let Some(meta) = directory_metadata {
342 if let Ok(meta) = directory_entry.symlink_metadata() {
339 343 if cached_mtime
340 .likely_equal_to_mtime_of(meta)
344 .likely_equal_to_mtime_of(&meta)
341 345 .unwrap_or(false)
342 346 {
343 347 // The mtime of that directory has not changed
@@ -358,26 +362,40 b" impl<'a, 'tree, 'on_disk> StatusCommon<'"
358 362 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
359 363 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
360 364 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
361 directory_fs_path: &Path,
362 directory_metadata: Option<&std::fs::Metadata>,
365 directory_entry: &DirEntry,
363 366 cached_directory_mtime: Option<TruncatedTimestamp>,
364 367 is_at_repo_root: bool,
365 368 ) -> Result<bool, DirstateV2ParseError> {
366 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
367 {
369 if self.can_skip_fs_readdir(directory_entry, cached_directory_mtime) {
368 370 dirstate_nodes
369 371 .par_iter()
370 372 .map(|dirstate_node| {
371 let fs_path = directory_fs_path.join(get_path_from_bytes(
373 let fs_path = &directory_entry.fs_path;
374 let fs_path = fs_path.join(get_path_from_bytes(
372 375 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
373 376 ));
374 377 match std::fs::symlink_metadata(&fs_path) {
375 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
376 &fs_path,
377 &fs_metadata,
378 Ok(fs_metadata) => {
379 let file_type =
380 match fs_metadata.file_type().try_into() {
381 Ok(file_type) => file_type,
382 Err(_) => return Ok(()),
383 };
384 let entry = DirEntry {
385 hg_path: Cow::Borrowed(
386 dirstate_node
387 .full_path(&self.dmap.on_disk)?,
388 ),
389 fs_path: Cow::Borrowed(&fs_path),
390 symlink_metadata: Some(fs_metadata),
391 file_type,
392 };
393 self.traverse_fs_and_dirstate(
394 &entry,
378 395 dirstate_node,
379 396 has_ignored_ancestor,
380 ),
397 )
398 }
381 399 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
382 400 self.traverse_dirstate_only(dirstate_node)
383 401 }
@@ -398,7 +416,7 b" impl<'a, 'tree, 'on_disk> StatusCommon<'"
398 416
399 417 let mut fs_entries = if let Ok(entries) = self.read_dir(
400 418 directory_hg_path,
401 directory_fs_path,
419 &directory_entry.fs_path,
402 420 is_at_repo_root,
403 421 ) {
404 422 entries
@@ -440,8 +458,7 b" impl<'a, 'tree, 'on_disk> StatusCommon<'"
440 458 match pair {
441 459 Both(dirstate_node, fs_entry) => {
442 460 self.traverse_fs_and_dirstate(
443 &fs_entry.fs_path,
444 &fs_entry.metadata,
461 &fs_entry,
445 462 dirstate_node,
446 463 has_ignored_ancestor,
447 464 )?;
@@ -466,22 +483,20 b" impl<'a, 'tree, 'on_disk> StatusCommon<'"
466 483
467 484 fn traverse_fs_and_dirstate<'ancestor>(
468 485 &self,
469 fs_path: &Path,
470 fs_metadata: &std::fs::Metadata,
486 fs_entry: &DirEntry,
471 487 dirstate_node: NodeRef<'tree, 'on_disk>,
472 488 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
473 489 ) -> Result<(), DirstateV2ParseError> {
474 490 self.check_for_outdated_directory_cache(&dirstate_node)?;
475 491 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
476 let file_type = fs_metadata.file_type();
477 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
492 let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink();
478 493 if !file_or_symlink {
479 494 // If we previously had a file here, it was removed (with
480 495 // `hg rm` or similar) or deleted before it could be
481 496 // replaced by a directory or something else.
482 497 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
483 498 }
484 if file_type.is_dir() {
499 if fs_entry.is_dir() {
485 500 if self.options.collect_traversed_dirs {
486 501 self.outcome
487 502 .lock()
@@ -499,14 +514,13 b" impl<'a, 'tree, 'on_disk> StatusCommon<'"
499 514 &is_ignored,
500 515 dirstate_node.children(self.dmap.on_disk)?,
501 516 hg_path,
502 fs_path,
503 Some(fs_metadata),
517 fs_entry,
504 518 dirstate_node.cached_directory_mtime()?,
505 519 is_at_repo_root,
506 520 )?;
507 521 self.maybe_save_directory_mtime(
508 522 children_all_have_dirstate_node_or_are_ignored,
509 fs_metadata,
523 fs_entry,
510 524 dirstate_node,
511 525 )?
512 526 } else {
@@ -527,7 +541,7 b" impl<'a, 'tree, 'on_disk> StatusCommon<'"
527 541 } else if entry.modified() {
528 542 self.push_outcome(Outcome::Modified, &dirstate_node)?;
529 543 } else {
530 self.handle_normal_file(&dirstate_node, fs_metadata)?;
544 self.handle_normal_file(&dirstate_node, fs_entry)?;
531 545 }
532 546 } else {
533 547 // `node.entry.is_none()` indicates a "directory"
@@ -550,7 +564,7 b" impl<'a, 'tree, 'on_disk> StatusCommon<'"
550 564 fn maybe_save_directory_mtime(
551 565 &self,
552 566 children_all_have_dirstate_node_or_are_ignored: bool,
553 directory_metadata: &std::fs::Metadata,
567 directory_entry: &DirEntry,
554 568 dirstate_node: NodeRef<'tree, 'on_disk>,
555 569 ) -> Result<(), DirstateV2ParseError> {
556 570 if !children_all_have_dirstate_node_or_are_ignored {
@@ -576,11 +590,13 b" impl<'a, 'tree, 'on_disk> StatusCommon<'"
576 590 // resolution based on the filesystem (for example ext3
577 591 // only stores integer seconds), kernel (see
578 592 // https://stackoverflow.com/a/14393315/1162888), etc.
593 let metadata = match directory_entry.symlink_metadata() {
594 Ok(meta) => meta,
595 Err(_) => return Ok(()),
596 };
579 597 let directory_mtime = if let Ok(option) =
580 TruncatedTimestamp::for_reliable_mtime_of(
581 directory_metadata,
582 status_start,
583 ) {
598 TruncatedTimestamp::for_reliable_mtime_of(&metadata, status_start)
599 {
584 600 if let Some(directory_mtime) = option {
585 601 directory_mtime
586 602 } else {
@@ -641,18 +657,23 b" impl<'a, 'tree, 'on_disk> StatusCommon<'"
641 657 fn handle_normal_file(
642 658 &self,
643 659 dirstate_node: &NodeRef<'tree, 'on_disk>,
644 fs_metadata: &std::fs::Metadata,
660 fs_entry: &DirEntry,
645 661 ) -> Result<(), DirstateV2ParseError> {
646 662 // Keep the low 31 bits
647 663 fn truncate_u64(value: u64) -> i32 {
648 664 (value & 0x7FFF_FFFF) as i32
649 665 }
650 666
667 let fs_metadata = match fs_entry.symlink_metadata() {
668 Ok(meta) => meta,
669 Err(_) => return Ok(()),
670 };
671
651 672 let entry = dirstate_node
652 673 .entry()?
653 674 .expect("handle_normal_file called with entry-less node");
654 675 let mode_changed =
655 || self.options.check_exec && entry.mode_changed(fs_metadata);
676 || self.options.check_exec && entry.mode_changed(&fs_metadata);
656 677 let size = entry.size();
657 678 let size_changed = size != truncate_u64(fs_metadata.len());
658 679 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
@@ -667,7 +688,7 b" impl<'a, 'tree, 'on_disk> StatusCommon<'"
667 688 } else {
668 689 let mtime_looks_clean;
669 690 if let Some(dirstate_mtime) = entry.truncated_mtime() {
670 let fs_mtime = TruncatedTimestamp::for_mtime_of(fs_metadata)
691 let fs_mtime = TruncatedTimestamp::for_mtime_of(&fs_metadata)
671 692 .expect("OS/libc does not support mtime?");
672 693 // There might be a change in the future if for example the
673 694 // internal clock become off while process run, but this is a
@@ -738,9 +759,8 b" impl<'a, 'tree, 'on_disk> StatusCommon<'"
738 759 fs_entry: &DirEntry,
739 760 ) -> bool {
740 761 let hg_path = directory_hg_path.join(&fs_entry.hg_path);
741 let file_type = fs_entry.metadata.file_type();
742 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
743 if file_type.is_dir() {
762 let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink();
763 if fs_entry.is_dir() {
744 764 let is_ignored =
745 765 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
746 766 let traverse_children = if is_ignored {
@@ -753,11 +773,9 b" impl<'a, 'tree, 'on_disk> StatusCommon<'"
753 773 };
754 774 if traverse_children {
755 775 let is_at_repo_root = false;
756 if let Ok(children_fs_entries) = self.read_dir(
757 &hg_path,
758 &fs_entry.fs_path,
759 is_at_repo_root,
760 ) {
776 if let Ok(children_fs_entries) =
777 self.read_dir(&hg_path, &fs_entry.fs_path, is_at_repo_root)
778 {
761 779 children_fs_entries.par_iter().for_each(|child_fs_entry| {
762 780 self.traverse_fs_only(
763 781 is_ignored,
@@ -820,17 +838,46 b" impl<'a, 'tree, 'on_disk> StatusCommon<'"
820 838 }
821 839 }
822 840
823 struct DirEntry {
824 /// Path as stored in the dirstate
825 hg_path: HgPathBuf,
826 /// Filesystem path
827 fs_path: PathBuf,
828 metadata: std::fs::Metadata,
841 /// Since [`std::fs::FileType`] cannot be built directly, we emulate what we
842 /// care about.
843 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
844 enum FakeFileType {
845 File,
846 Directory,
847 Symlink,
829 848 }
830 849
831 impl DirEntry {
832 /// Returns **unsorted** entries in the given directory, with name and
833 /// metadata.
850 impl TryFrom<std::fs::FileType> for FakeFileType {
851 type Error = ();
852
853 fn try_from(f: std::fs::FileType) -> Result<Self, Self::Error> {
854 if f.is_dir() {
855 Ok(Self::Directory)
856 } else if f.is_file() {
857 Ok(Self::File)
858 } else if f.is_symlink() {
859 Ok(Self::Symlink)
860 } else {
861 // Things like FIFO etc.
862 Err(())
863 }
864 }
865 }
866
867 struct DirEntry<'a> {
868 /// Path as stored in the dirstate, or just the filename for optimization.
869 hg_path: HgPathCow<'a>,
870 /// Filesystem path
871 fs_path: Cow<'a, Path>,
872 /// Lazily computed
873 symlink_metadata: Option<std::fs::Metadata>,
874 /// Already computed for ergonomics.
875 file_type: FakeFileType,
876 }
877
878 impl<'a> DirEntry<'a> {
879 /// Returns **unsorted** entries in the given directory, with name,
880 /// metadata and file type.
834 881 ///
835 882 /// If a `.hg` sub-directory is encountered:
836 883 ///
@@ -844,7 +891,7 b' impl DirEntry {'
844 891 let mut results = Vec::new();
845 892 for entry in read_dir_path.read_dir()? {
846 893 let entry = entry?;
847 let metadata = match entry.metadata() {
894 let file_type = match entry.file_type() {
848 895 Ok(v) => v,
849 896 Err(e) => {
850 897 // race with file deletion?
@@ -861,7 +908,7 b' impl DirEntry {'
861 908 if is_at_repo_root {
862 909 // Skip the repo’s own .hg (might be a symlink)
863 910 continue;
864 } else if metadata.is_dir() {
911 } else if file_type.is_dir() {
865 912 // A .hg sub-directory at another location means a subrepo,
866 913 // skip it entirely.
867 914 return Ok(Vec::new());
@@ -872,15 +919,40 b' impl DirEntry {'
872 919 } else {
873 920 entry.path()
874 921 };
875 let base_name = get_bytes_from_os_string(file_name).into();
922 let filename =
923 Cow::Owned(get_bytes_from_os_string(file_name).into());
924 let file_type = match FakeFileType::try_from(file_type) {
925 Ok(file_type) => file_type,
926 Err(_) => continue,
927 };
876 928 results.push(DirEntry {
877 hg_path: base_name,
878 fs_path: full_path,
879 metadata,
929 hg_path: filename,
930 fs_path: Cow::Owned(full_path.to_path_buf()),
931 symlink_metadata: None,
932 file_type,
880 933 })
881 934 }
882 935 Ok(results)
883 936 }
937
938 fn symlink_metadata(&self) -> Result<std::fs::Metadata, std::io::Error> {
939 match &self.symlink_metadata {
940 Some(meta) => Ok(meta.clone()),
941 None => std::fs::symlink_metadata(&self.fs_path),
942 }
943 }
944
945 fn is_dir(&self) -> bool {
946 self.file_type == FakeFileType::Directory
947 }
948
949 fn is_file(&self) -> bool {
950 self.file_type == FakeFileType::File
951 }
952
953 fn is_symlink(&self) -> bool {
954 self.file_type == FakeFileType::Symlink
955 }
884 956 }
885 957
886 958 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
General Comments 0
You need to be logged in to leave comments. Login now