##// END OF EJS Templates
run-tests: call the command using shell=True to please Windows...
run-tests: call the command using shell=True to please Windows Windows is unhappy about the lack of shell=True, probably because the "self._realhg" is a script instead of a Win32 executable.

File last commit:

r53200:22d24f6d default
r53436:4f2bbad8 default
Show More
status.rs
1172 lines | 43.2 KiB | application/rls-services+xml | RustLexer
// status.rs
//
// Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
//! Rust implementation of dirstate.status (dirstate.py).
//! It is currently missing a lot of functionality compared to the Python one
//! and will only be triggered in narrow cases.
use crate::dirstate::dirstate_map::BorrowedPath;
use crate::dirstate::dirstate_map::ChildNodesRef;
use crate::dirstate::dirstate_map::DirstateMap;
use crate::dirstate::dirstate_map::DirstateVersion;
use crate::dirstate::dirstate_map::NodeRef;
use crate::dirstate::entry::TruncatedTimestamp;
use crate::dirstate::on_disk::DirstateV2ParseError;
use crate::filepatterns::PatternError;
use crate::filepatterns::PatternFileWarning;
use crate::matchers::get_ignore_function;
use crate::matchers::{Matcher, VisitChildrenSet};
use crate::utils::files::filesystem_now;
use crate::utils::files::get_bytes_from_os_string;
use crate::utils::files::get_bytes_from_path;
use crate::utils::files::get_path_from_bytes;
use crate::utils::hg_path::hg_path_to_path_buf;
use crate::utils::hg_path::HgPath;
use crate::utils::hg_path::HgPathError;
use once_cell::sync::OnceCell;
use rayon::prelude::*;
use sha1::{Digest, Sha1};
use std::io;
use std::os::unix::prelude::FileTypeExt;
use std::path::Path;
use std::path::PathBuf;
use std::sync::Mutex;
use std::{borrow::Cow, fmt};
/// Wrong type of file from a `BadMatch`
/// Note: a lot of those don't exist on all platforms.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum BadType {
CharacterDevice,
BlockDevice,
FIFO,
Socket,
Directory,
Unknown,
}
impl fmt::Display for BadType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str(match self {
BadType::CharacterDevice => "character device",
BadType::BlockDevice => "block device",
BadType::FIFO => "fifo",
BadType::Socket => "socket",
BadType::Directory => "directory",
BadType::Unknown => "unknown",
})
}
}
/// Was explicitly matched but cannot be found/accessed
#[derive(Debug, Copy, Clone)]
pub enum BadMatch {
OsError(i32),
BadType(BadType),
}
/// `Box<dyn Trait>` is syntactic sugar for `Box<dyn Trait + 'static>`, so add
/// an explicit lifetime here to not fight `'static` bounds "out of nowhere".
pub type IgnoreFnType<'a> =
Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>;
/// We have a good mix of owned (from directory traversal) and borrowed (from
/// the dirstate/explicit) paths, this comes up a lot.
pub type HgPathCow<'a> = Cow<'a, HgPath>;
#[derive(Debug, Copy, Clone)]
pub struct StatusOptions {
/// Whether we are on a filesystem with UNIX-like exec flags
pub check_exec: bool,
pub list_clean: bool,
pub list_unknown: bool,
pub list_ignored: bool,
/// Whether to populate `StatusPath::copy_source`
pub list_copies: bool,
/// Whether to collect traversed dirs for applying a callback later.
/// Used by `hg purge` for example.
pub collect_traversed_dirs: bool,
}
#[derive(Default)]
pub struct DirstateStatus<'a> {
/// The current time at the start of the `status()` algorithm, as measured
/// and possibly truncated by the filesystem.
pub filesystem_time_at_status_start: Option<TruncatedTimestamp>,
/// Tracked files whose contents have changed since the parent revision
pub modified: Vec<StatusPath<'a>>,
/// Newly-tracked files that were not present in the parent
pub added: Vec<StatusPath<'a>>,
/// Previously-tracked files that have been (re)moved with an hg command
pub removed: Vec<StatusPath<'a>>,
/// (Still) tracked files that are missing, (re)moved with an non-hg
/// command
pub deleted: Vec<StatusPath<'a>>,
/// Tracked files that are up to date with the parent.
/// Only pupulated if `StatusOptions::list_clean` is true.
pub clean: Vec<StatusPath<'a>>,
/// Files in the working directory that are ignored with `.hgignore`.
/// Only pupulated if `StatusOptions::list_ignored` is true.
pub ignored: Vec<StatusPath<'a>>,
/// Files in the working directory that are neither tracked nor ignored.
/// Only pupulated if `StatusOptions::list_unknown` is true.
pub unknown: Vec<StatusPath<'a>>,
/// Was explicitly matched but cannot be found/accessed
pub bad: Vec<(HgPathCow<'a>, BadMatch)>,
/// Either clean or modified, but we can’t tell from filesystem metadata
/// alone. The file contents need to be read and compared with that in
/// the parent.
pub unsure: Vec<StatusPath<'a>>,
/// Only filled if `collect_traversed_dirs` is `true`
pub traversed: Vec<HgPathCow<'a>>,
/// Whether `status()` made changed to the `DirstateMap` that should be
/// written back to disk
pub dirty: bool,
}
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct StatusPath<'a> {
pub path: HgPathCow<'a>,
pub copy_source: Option<HgPathCow<'a>>,
}
#[derive(Debug, derive_more::From)]
pub enum StatusError {
/// An invalid path that cannot be represented in Mercurial was found
Path(HgPathError),
/// An invalid "ignore" pattern was found
Pattern(PatternError),
/// Corrupted dirstate
DirstateV2ParseError(DirstateV2ParseError),
}
impl fmt::Display for StatusError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
StatusError::Path(error) => error.fmt(f),
StatusError::Pattern(error) => error.fmt(f),
StatusError::DirstateV2ParseError(_) => {
f.write_str("dirstate-v2 parse error")
}
}
}
}
/// Returns the status of the working directory compared to its parent
/// changeset.
///
/// This algorithm is based on traversing the filesystem tree (`fs` in function
/// and variable names) and dirstate tree at the same time. The core of this
/// traversal is the recursive `traverse_fs_directory_and_dirstate` function
/// and its use of `itertools::merge_join_by`. When reaching a path that only
/// exists in one of the two trees, depending on information requested by
/// `options` we may need to traverse the remaining subtree.
#[logging_timer::time("trace")]
pub fn status<'dirstate>(
dmap: &'dirstate mut DirstateMap,
matcher: &(dyn Matcher + Sync),
root_dir: PathBuf,
ignore_files: Vec<PathBuf>,
options: StatusOptions,
) -> Result<(DirstateStatus<'dirstate>, Vec<PatternFileWarning>), StatusError>
{
// Also cap for a Python caller of this function, but don't complain if
// the global threadpool has already been set since this code path is also
// being used by `rhg`, which calls this early.
let _ = crate::utils::cap_default_rayon_threads();
let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
if options.list_ignored || options.list_unknown {
let (ignore_fn, warnings, changed) = match dmap.dirstate_version {
DirstateVersion::V1 => {
let (ignore_fn, warnings) = get_ignore_function(
ignore_files,
&root_dir,
&mut |_source, _pattern_bytes| {},
)?;
(ignore_fn, warnings, None)
}
DirstateVersion::V2 => {
let mut hasher = Sha1::new();
let (ignore_fn, warnings) = get_ignore_function(
ignore_files,
&root_dir,
&mut |source, pattern_bytes| {
// If inside the repo, use the relative version to
// make it deterministic inside tests.
// The performance hit should be negligible.
let source = source
.strip_prefix(&root_dir)
.unwrap_or(source);
let source = get_bytes_from_path(source);
let mut subhasher = Sha1::new();
subhasher.update(pattern_bytes);
let patterns_hash = subhasher.finalize();
hasher.update(source);
hasher.update(b" ");
hasher.update(patterns_hash);
hasher.update(b"\n");
},
)?;
let new_hash = *hasher.finalize().as_ref();
let changed = new_hash != dmap.ignore_patterns_hash;
dmap.ignore_patterns_hash = new_hash;
(ignore_fn, warnings, Some(changed))
}
};
(ignore_fn, warnings, changed)
} else {
(Box::new(|&_| true), vec![], None)
};
let filesystem_time_at_status_start =
filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
// If the repository is under the current directory, prefer using a
// relative path, so the kernel needs to traverse fewer directory in every
// call to `read_dir` or `symlink_metadata`.
// This is effective in the common case where the current directory is the
// repository root.
// TODO: Better yet would be to use libc functions like `openat` and
// `fstatat` to remove such repeated traversals entirely, but the standard
// library does not provide APIs based on those.
// Maybe with a crate like https://crates.io/crates/openat instead?
let root_dir = if let Some(relative) = std::env::current_dir()
.ok()
.and_then(|cwd| root_dir.strip_prefix(cwd).ok())
{
relative
} else {
&root_dir
};
let outcome = DirstateStatus {
filesystem_time_at_status_start,
..Default::default()
};
let common = StatusCommon {
dmap,
options,
matcher,
ignore_fn,
outcome: Mutex::new(outcome),
ignore_patterns_have_changed: patterns_changed,
new_cacheable_directories: Default::default(),
outdated_cached_directories: Default::default(),
filesystem_time_at_status_start,
};
let is_at_repo_root = true;
let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
let has_ignored_ancestor = HasIgnoredAncestor::create(None, hg_path);
let root_cached_mtime = None;
// If the path we have for the repository root is a symlink, do follow it.
// (As opposed to symlinks within the working directory which are not
// followed, using `std::fs::symlink_metadata`.)
common.traverse_fs_directory_and_dirstate(
&has_ignored_ancestor,
dmap.root.as_ref(),
hg_path,
&DirEntry {
hg_path: Cow::Borrowed(HgPath::new(b"")),
fs_path: Cow::Borrowed(root_dir),
symlink_metadata: None,
file_type: FakeFileType::Directory,
},
root_cached_mtime,
is_at_repo_root,
)?;
if let Some(file_set) = common.matcher.file_set() {
for file in file_set {
if !file.is_empty() && !dmap.has_node(file)? {
let path = hg_path_to_path_buf(file)?;
if let io::Result::Err(error) =
root_dir.join(path).symlink_metadata()
{
common.io_error(error, file)
}
}
}
}
let mut outcome = common.outcome.into_inner().unwrap();
let new_cacheable = common.new_cacheable_directories.into_inner().unwrap();
let outdated = common.outdated_cached_directories.into_inner().unwrap();
outcome.dirty = common.ignore_patterns_have_changed == Some(true)
|| !outdated.is_empty()
|| (!new_cacheable.is_empty()
&& dmap.dirstate_version == DirstateVersion::V2);
// Remove outdated mtimes before adding new mtimes, in case a given
// directory is both
for path in &outdated {
dmap.clear_cached_mtime(path)?;
}
for (path, mtime) in &new_cacheable {
dmap.set_cached_mtime(path, *mtime)?;
}
Ok((outcome, warnings))
}
/// Bag of random things needed by various parts of the algorithm. Reduces the
/// number of parameters passed to functions.
struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
dmap: &'tree DirstateMap<'on_disk>,
options: StatusOptions,
matcher: &'a (dyn Matcher + Sync),
ignore_fn: IgnoreFnType<'a>,
outcome: Mutex<DirstateStatus<'on_disk>>,
/// New timestamps of directories to be used for caching their readdirs
new_cacheable_directories:
Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
/// Used to invalidate the readdir cache of directories
outdated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
/// Whether ignore files like `.hgignore` have changed since the previous
/// time a `status()` call wrote their hash to the dirstate. `None` means
/// we don’t know as this run doesn’t list either ignored or uknown files
/// and therefore isn’t reading `.hgignore`.
ignore_patterns_have_changed: Option<bool>,
/// The current time at the start of the `status()` algorithm, as measured
/// and possibly truncated by the filesystem.
filesystem_time_at_status_start: Option<TruncatedTimestamp>,
}
enum Outcome {
Modified,
Added,
Removed,
Deleted,
Clean,
Ignored,
Unknown,
Unsure,
}
/// Lazy computation of whether a given path has a hgignored
/// ancestor.
struct HasIgnoredAncestor<'a> {
/// `path` and `parent` constitute the inputs to the computation,
/// `cache` stores the outcome.
path: &'a HgPath,
parent: Option<&'a HasIgnoredAncestor<'a>>,
cache: OnceCell<bool>,
}
impl<'a> HasIgnoredAncestor<'a> {
fn create(
parent: Option<&'a HasIgnoredAncestor<'a>>,
path: &'a HgPath,
) -> HasIgnoredAncestor<'a> {
Self {
path,
parent,
cache: OnceCell::new(),
}
}
fn force(&self, ignore_fn: &IgnoreFnType<'_>) -> bool {
match self.parent {
None => false,
Some(parent) => {
*(self.cache.get_or_init(|| {
parent.force(ignore_fn) || ignore_fn(self.path)
}))
}
}
}
}
impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
fn push_outcome(
&self,
which: Outcome,
dirstate_node: &NodeRef<'tree, 'on_disk>,
) -> Result<(), DirstateV2ParseError> {
let path = dirstate_node
.full_path_borrowed(self.dmap.on_disk)?
.detach_from_tree();
let copy_source = if self.options.list_copies {
dirstate_node
.copy_source_borrowed(self.dmap.on_disk)?
.map(|source| source.detach_from_tree())
} else {
None
};
self.push_outcome_common(which, path, copy_source);
Ok(())
}
fn push_outcome_without_copy_source(
&self,
which: Outcome,
path: &BorrowedPath<'_, 'on_disk>,
) {
self.push_outcome_common(which, path.detach_from_tree(), None)
}
fn push_outcome_common(
&self,
which: Outcome,
path: HgPathCow<'on_disk>,
copy_source: Option<HgPathCow<'on_disk>>,
) {
let mut outcome = self.outcome.lock().unwrap();
let vec = match which {
Outcome::Modified => &mut outcome.modified,
Outcome::Added => &mut outcome.added,
Outcome::Removed => &mut outcome.removed,
Outcome::Deleted => &mut outcome.deleted,
Outcome::Clean => &mut outcome.clean,
Outcome::Ignored => &mut outcome.ignored,
Outcome::Unknown => &mut outcome.unknown,
Outcome::Unsure => &mut outcome.unsure,
};
vec.push(StatusPath { path, copy_source });
}
fn read_dir(
&self,
hg_path: &HgPath,
fs_path: &Path,
is_at_repo_root: bool,
) -> Result<Vec<DirEntry>, ()> {
DirEntry::read_dir(fs_path, is_at_repo_root)
.map_err(|error| self.io_error(error, hg_path))
}
fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
let errno = error.raw_os_error().expect("expected real OS error");
self.outcome
.lock()
.unwrap()
.bad
.push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
}
fn check_for_outdated_directory_cache(
&self,
dirstate_node: &NodeRef<'tree, 'on_disk>,
) -> Result<bool, DirstateV2ParseError> {
if self.ignore_patterns_have_changed == Some(true)
&& dirstate_node.cached_directory_mtime()?.is_some()
{
self.outdated_cached_directories.lock().unwrap().push(
dirstate_node
.full_path_borrowed(self.dmap.on_disk)?
.detach_from_tree(),
);
return Ok(true);
}
Ok(false)
}
/// If this returns true, we can get accurate results by only using
/// `symlink_metadata` for child nodes that exist in the dirstate and don’t
/// need to call `read_dir`.
fn can_skip_fs_readdir(
&self,
directory_entry: &DirEntry,
cached_directory_mtime: Option<TruncatedTimestamp>,
) -> bool {
if !self.options.list_unknown && !self.options.list_ignored {
// All states that we care about listing have corresponding
// dirstate entries.
// This happens for example with `hg status -mard`.
return true;
}
if !self.options.list_ignored
&& self.ignore_patterns_have_changed == Some(false)
{
if let Some(cached_mtime) = cached_directory_mtime {
// The dirstate contains a cached mtime for this directory, set
// by a previous run of the `status` algorithm which found this
// directory eligible for `read_dir` caching.
if let Ok(meta) = directory_entry.symlink_metadata() {
if cached_mtime
.likely_equal_to_mtime_of(&meta)
.unwrap_or(false)
{
// The mtime of that directory has not changed
// since then, which means that the results of
// `read_dir` should also be unchanged.
return true;
}
}
}
}
false
}
fn should_visit(set: &VisitChildrenSet, basename: &HgPath) -> bool {
match set {
VisitChildrenSet::This | VisitChildrenSet::Recursive => true,
VisitChildrenSet::Empty => false,
VisitChildrenSet::Set(children_to_visit) => {
children_to_visit.contains(basename)
}
}
}
/// Returns whether all child entries of the filesystem directory have a
/// corresponding dirstate node or are ignored.
fn traverse_fs_directory_and_dirstate<'ancestor>(
&self,
has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
directory_entry: &DirEntry,
cached_directory_mtime: Option<TruncatedTimestamp>,
is_at_repo_root: bool,
) -> Result<bool, DirstateV2ParseError> {
let children_set = self.matcher.visit_children_set(directory_hg_path);
if let VisitChildrenSet::Empty = children_set {
return Ok(false);
}
if self.can_skip_fs_readdir(directory_entry, cached_directory_mtime) {
dirstate_nodes
.par_iter()
.map(|dirstate_node| {
let fs_path = &directory_entry.fs_path;
let basename =
dirstate_node.base_name(self.dmap.on_disk)?.as_bytes();
let fs_path = fs_path.join(get_path_from_bytes(basename));
if !Self::should_visit(
&children_set,
HgPath::new(basename),
) {
return Ok(());
}
match std::fs::symlink_metadata(&fs_path) {
Ok(fs_metadata) => {
let file_type = fs_metadata.file_type().into();
let entry = DirEntry {
hg_path: Cow::Borrowed(
dirstate_node
.full_path(self.dmap.on_disk)?,
),
fs_path: Cow::Borrowed(&fs_path),
symlink_metadata: Some(fs_metadata),
file_type,
};
self.traverse_fs_and_dirstate(
&entry,
dirstate_node,
has_ignored_ancestor,
)
}
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
self.traverse_dirstate_only(dirstate_node)
}
Err(error) => {
let hg_path =
dirstate_node.full_path(self.dmap.on_disk)?;
self.io_error(error, hg_path);
Ok(())
}
}
})
.collect::<Result<(), _>>()?;
// We don’t know, so conservatively say this isn’t the case
let children_all_have_dirstate_node_or_are_ignored = false;
return Ok(children_all_have_dirstate_node_or_are_ignored);
}
let readdir_succeeded;
let mut fs_entries = if let Ok(entries) = self.read_dir(
directory_hg_path,
&directory_entry.fs_path,
is_at_repo_root,
) {
readdir_succeeded = true;
entries
} else {
// Treat an unreadable directory (typically because of insufficient
// permissions) like an empty directory. `self.read_dir` has
// already called `self.io_error` so a warning will be emitted.
// We still need to remember that there was an error so that we
// know not to cache this result.
readdir_succeeded = false;
Vec::new()
};
// `merge_join_by` requires both its input iterators to be sorted:
let dirstate_nodes = dirstate_nodes.sorted();
// `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
// https://github.com/rust-lang/rust/issues/34162
fs_entries.sort_unstable_by(|e1, e2| e1.hg_path.cmp(&e2.hg_path));
// Propagate here any error that would happen inside the comparison
// callback below
for dirstate_node in &dirstate_nodes {
dirstate_node.base_name(self.dmap.on_disk)?;
}
itertools::merge_join_by(
dirstate_nodes,
&fs_entries,
|dirstate_node, fs_entry| {
// This `unwrap` never panics because we already propagated
// those errors above
dirstate_node
.base_name(self.dmap.on_disk)
.unwrap()
.cmp(&fs_entry.hg_path)
},
)
.par_bridge()
.map(|pair| {
use itertools::EitherOrBoth::*;
let basename = match &pair {
Left(dirstate_node) | Both(dirstate_node, _) => HgPath::new(
dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
),
Right(fs_entry) => &fs_entry.hg_path,
};
if !Self::should_visit(&children_set, basename) {
return Ok(false);
}
let has_dirstate_node_or_is_ignored = match pair {
Both(dirstate_node, fs_entry) => {
self.traverse_fs_and_dirstate(
fs_entry,
dirstate_node,
has_ignored_ancestor,
)?;
true
}
Left(dirstate_node) => {
self.traverse_dirstate_only(dirstate_node)?;
true
}
Right(fs_entry) => self.traverse_fs_only(
has_ignored_ancestor.force(&self.ignore_fn),
directory_hg_path,
fs_entry,
),
};
Ok(has_dirstate_node_or_is_ignored)
})
.try_reduce(|| true, |a, b| Ok(a && b))
.map(|res| res && readdir_succeeded)
}
fn traverse_fs_and_dirstate<'ancestor>(
&self,
fs_entry: &DirEntry,
dirstate_node: NodeRef<'tree, 'on_disk>,
has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
) -> Result<(), DirstateV2ParseError> {
let outdated_dircache =
self.check_for_outdated_directory_cache(&dirstate_node)?;
let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink();
if !file_or_symlink {
// If we previously had a file here, it was removed (with
// `hg rm` or similar) or deleted before it could be
// replaced by a directory or something else.
self.mark_removed_or_deleted_if_file(&dirstate_node)?;
}
if let Some(bad_type) = fs_entry.is_bad() {
if self.matcher.exact_match(hg_path) {
let path = dirstate_node.full_path(self.dmap.on_disk)?;
self.outcome.lock().unwrap().bad.push((
path.to_owned().into(),
BadMatch::BadType(bad_type),
))
}
}
if fs_entry.is_dir() {
if self.options.collect_traversed_dirs {
self.outcome
.lock()
.unwrap()
.traversed
.push(hg_path.detach_from_tree())
}
let is_ignored = HasIgnoredAncestor::create(
Some(has_ignored_ancestor),
hg_path,
);
let is_at_repo_root = false;
let children_all_have_dirstate_node_or_are_ignored = self
.traverse_fs_directory_and_dirstate(
&is_ignored,
dirstate_node.children(self.dmap.on_disk)?,
hg_path,
fs_entry,
dirstate_node.cached_directory_mtime()?,
is_at_repo_root,
)?;
self.maybe_save_directory_mtime(
children_all_have_dirstate_node_or_are_ignored,
fs_entry,
dirstate_node,
outdated_dircache,
)?
} else {
if file_or_symlink && self.matcher.matches(hg_path) {
if let Some(entry) = dirstate_node.entry()? {
if !entry.any_tracked() {
// Forward-compat if we start tracking unknown/ignored
// files for caching reasons
self.mark_unknown_or_ignored(
has_ignored_ancestor.force(&self.ignore_fn),
hg_path,
);
}
if entry.added() {
self.push_outcome(Outcome::Added, &dirstate_node)?;
} else if entry.removed() {
self.push_outcome(Outcome::Removed, &dirstate_node)?;
} else if entry.modified() {
self.push_outcome(Outcome::Modified, &dirstate_node)?;
} else {
self.handle_normal_file(&dirstate_node, fs_entry)?;
}
} else {
// `node.entry.is_none()` indicates a "directory"
// node, but the filesystem has a file
self.mark_unknown_or_ignored(
has_ignored_ancestor.force(&self.ignore_fn),
hg_path,
);
}
}
for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
{
self.traverse_dirstate_only(child_node)?
}
}
Ok(())
}
/// Save directory mtime if applicable.
///
/// `outdated_directory_cache` is `true` if we've just invalidated the
/// cache for this directory in `check_for_outdated_directory_cache`,
/// which forces the update.
fn maybe_save_directory_mtime(
&self,
children_all_have_dirstate_node_or_are_ignored: bool,
directory_entry: &DirEntry,
dirstate_node: NodeRef<'tree, 'on_disk>,
outdated_directory_cache: bool,
) -> Result<(), DirstateV2ParseError> {
if !children_all_have_dirstate_node_or_are_ignored {
return Ok(());
}
// All filesystem directory entries from `read_dir` have a
// corresponding node in the dirstate, so we can reconstitute the
// names of those entries without calling `read_dir` again.
// TODO: use let-else here and below when available:
// https://github.com/rust-lang/rust/issues/87335
let status_start = if let Some(status_start) =
&self.filesystem_time_at_status_start
{
status_start
} else {
return Ok(());
};
// Although the Rust standard library’s `SystemTime` type
// has nanosecond precision, the times reported for a
// directory’s (or file’s) modified time may have lower
// resolution based on the filesystem (for example ext3
// only stores integer seconds), kernel (see
// https://stackoverflow.com/a/14393315/1162888), etc.
let metadata = match directory_entry.symlink_metadata() {
Ok(meta) => meta,
Err(_) => return Ok(()),
};
let directory_mtime = match TruncatedTimestamp::for_reliable_mtime_of(
&metadata,
status_start,
) {
Ok(Some(directory_mtime)) => directory_mtime,
Ok(None) => {
// The directory was modified too recently,
// don’t cache its `read_dir` results.
//
// 1. A change to this directory (direct child was added or
// removed) cause its mtime to be set (possibly truncated)
// to `directory_mtime`
// 2. This `status` algorithm calls `read_dir`
// 3. An other change is made to the same directory is made so
// that calling `read_dir` agin would give different
// results, but soon enough after 1. that the mtime stays
// the same
//
// On a system where the time resolution poor, this
// scenario is not unlikely if all three steps are caused
// by the same script.
return Ok(());
}
Err(_) => {
// OS/libc does not support mtime?
return Ok(());
}
};
// We’ve observed (through `status_start`) that time has
// “progressed” since `directory_mtime`, so any further
// change to this directory is extremely likely to cause a
// different mtime.
//
// Having the same mtime again is not entirely impossible
// since the system clock is not monotonous. It could jump
// backward to some point before `directory_mtime`, then a
// directory change could potentially happen during exactly
// the wrong tick.
//
// We deem this scenario (unlike the previous one) to be
// unlikely enough in practice.
let is_up_to_date = if let Some(cached) =
dirstate_node.cached_directory_mtime()?
{
!outdated_directory_cache && cached.likely_equal(directory_mtime)
} else {
false
};
if !is_up_to_date {
let hg_path = dirstate_node
.full_path_borrowed(self.dmap.on_disk)?
.detach_from_tree();
self.new_cacheable_directories
.lock()
.unwrap()
.push((hg_path, directory_mtime))
}
Ok(())
}
/// A file that is clean in the dirstate was found in the filesystem
fn handle_normal_file(
&self,
dirstate_node: &NodeRef<'tree, 'on_disk>,
fs_entry: &DirEntry,
) -> Result<(), DirstateV2ParseError> {
// Keep the low 31 bits
fn truncate_u64(value: u64) -> i32 {
(value & 0x7FFF_FFFF) as i32
}
let fs_metadata = match fs_entry.symlink_metadata() {
Ok(meta) => meta,
Err(_) => return Ok(()),
};
let entry = dirstate_node
.entry()?
.expect("handle_normal_file called with entry-less node");
let mode_changed =
|| self.options.check_exec && entry.mode_changed(&fs_metadata);
let size = entry.size();
let size_changed = size != truncate_u64(fs_metadata.len());
if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
// issue6456: Size returned may be longer due to encryption
// on EXT-4 fscrypt. TODO maybe only do it on EXT4?
self.push_outcome(Outcome::Unsure, dirstate_node)?
} else if dirstate_node.has_copy_source()
|| entry.is_from_other_parent()
|| (size >= 0 && (size_changed || mode_changed()))
{
self.push_outcome(Outcome::Modified, dirstate_node)?
} else {
let mtime_looks_clean = if let Some(dirstate_mtime) =
entry.truncated_mtime()
{
let fs_mtime = TruncatedTimestamp::for_mtime_of(&fs_metadata)
.expect("OS/libc does not support mtime?");
// There might be a change in the future if for example the
// internal clock become off while process run, but this is a
// case where the issues the user would face
// would be a lot worse and there is nothing we
// can really do.
fs_mtime.likely_equal(dirstate_mtime)
} else {
// No mtime in the dirstate entry
false
};
if !mtime_looks_clean {
self.push_outcome(Outcome::Unsure, dirstate_node)?
} else if self.options.list_clean {
self.push_outcome(Outcome::Clean, dirstate_node)?
}
}
Ok(())
}
/// A node in the dirstate tree has no corresponding filesystem entry
fn traverse_dirstate_only(
&self,
dirstate_node: NodeRef<'tree, 'on_disk>,
) -> Result<(), DirstateV2ParseError> {
self.check_for_outdated_directory_cache(&dirstate_node)?;
self.mark_removed_or_deleted_if_file(&dirstate_node)?;
dirstate_node
.children(self.dmap.on_disk)?
.par_iter()
.map(|child_node| self.traverse_dirstate_only(child_node))
.collect()
}
/// A node in the dirstate tree has no corresponding *file* on the
/// filesystem
///
/// Does nothing on a "directory" node
fn mark_removed_or_deleted_if_file(
&self,
dirstate_node: &NodeRef<'tree, 'on_disk>,
) -> Result<(), DirstateV2ParseError> {
if let Some(entry) = dirstate_node.entry()? {
if !entry.any_tracked() {
// Future-compat for when we start storing ignored and unknown
// files for caching reasons
return Ok(());
}
let path = dirstate_node.full_path(self.dmap.on_disk)?;
if self.matcher.matches(path) {
if entry.removed() {
self.push_outcome(Outcome::Removed, dirstate_node)?
} else {
self.push_outcome(Outcome::Deleted, dirstate_node)?
}
}
}
Ok(())
}
/// Something in the filesystem has no corresponding dirstate node
///
/// Returns whether that path is ignored
fn traverse_fs_only(
&self,
has_ignored_ancestor: bool,
directory_hg_path: &HgPath,
fs_entry: &DirEntry,
) -> bool {
let hg_path = directory_hg_path.join(&fs_entry.hg_path);
let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink();
if fs_entry.is_dir() {
let is_ignored =
has_ignored_ancestor || (self.ignore_fn)(&hg_path);
let traverse_children = if is_ignored {
// Descendants of an ignored directory are all ignored
self.options.list_ignored
} else {
// Descendants of an unknown directory may be either unknown or
// ignored
self.options.list_unknown || self.options.list_ignored
};
if traverse_children {
let is_at_repo_root = false;
if let Ok(children_fs_entries) =
self.read_dir(&hg_path, &fs_entry.fs_path, is_at_repo_root)
{
children_fs_entries.par_iter().for_each(|child_fs_entry| {
self.traverse_fs_only(
is_ignored,
&hg_path,
child_fs_entry,
);
})
}
if self.options.collect_traversed_dirs {
self.outcome.lock().unwrap().traversed.push(hg_path.into())
}
}
is_ignored
} else if file_or_symlink {
if self.matcher.matches(&hg_path) {
self.mark_unknown_or_ignored(
has_ignored_ancestor,
&BorrowedPath::InMemory(&hg_path),
)
} else {
// We haven’t computed whether this path is ignored. It
// might not be, and a future run of status might have a
// different matcher that matches it. So treat it as not
// ignored. That is, inhibit readdir caching of the parent
// directory.
false
}
} else {
// This is neither a directory, a plain file, or a symlink.
// Treat it like an ignored file.
true
}
}
/// Returns whether that path is ignored
fn mark_unknown_or_ignored(
&self,
has_ignored_ancestor: bool,
hg_path: &BorrowedPath<'_, 'on_disk>,
) -> bool {
let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
if is_ignored {
if self.options.list_ignored {
self.push_outcome_without_copy_source(
Outcome::Ignored,
hg_path,
)
}
} else if self.options.list_unknown {
self.push_outcome_without_copy_source(Outcome::Unknown, hg_path)
}
is_ignored
}
}
/// Since [`std::fs::FileType`] cannot be built directly, we emulate what we
/// care about.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
enum FakeFileType {
File,
Directory,
Symlink,
BadType(BadType),
}
impl From<std::fs::FileType> for FakeFileType {
fn from(f: std::fs::FileType) -> Self {
if f.is_dir() {
Self::Directory
} else if f.is_file() {
Self::File
} else if f.is_symlink() {
Self::Symlink
} else if f.is_fifo() {
Self::BadType(BadType::FIFO)
} else if f.is_block_device() {
Self::BadType(BadType::BlockDevice)
} else if f.is_char_device() {
Self::BadType(BadType::CharacterDevice)
} else if f.is_socket() {
Self::BadType(BadType::Socket)
} else {
Self::BadType(BadType::Unknown)
}
}
}
struct DirEntry<'a> {
/// Path as stored in the dirstate, or just the filename for optimization.
hg_path: HgPathCow<'a>,
/// Filesystem path
fs_path: Cow<'a, Path>,
/// Lazily computed
symlink_metadata: Option<std::fs::Metadata>,
/// Already computed for ergonomics.
file_type: FakeFileType,
}
impl<'a> DirEntry<'a> {
/// Returns **unsorted** entries in the given directory, with name,
/// metadata and file type.
///
/// If a `.hg` sub-directory is encountered:
///
/// * At the repository root, ignore that sub-directory
/// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
/// list instead.
fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
// `read_dir` returns a "not found" error for the empty path
let at_cwd = path == Path::new("");
let read_dir_path = if at_cwd { Path::new(".") } else { path };
let mut results = Vec::new();
for entry in read_dir_path.read_dir()? {
let entry = entry?;
let file_type = match entry.file_type() {
Ok(v) => v,
Err(e) => {
// race with file deletion?
if e.kind() == std::io::ErrorKind::NotFound {
continue;
} else {
return Err(e);
}
}
};
let file_name = entry.file_name();
// FIXME don't do this when cached
if file_name == ".hg" {
if is_at_repo_root {
// Skip the repo’s own .hg (might be a symlink)
continue;
} else if file_type.is_dir() {
// A .hg sub-directory at another location means a subrepo,
// skip it entirely.
return Ok(Vec::new());
}
}
let full_path = if at_cwd {
file_name.clone().into()
} else {
entry.path()
};
let filename =
Cow::Owned(get_bytes_from_os_string(file_name).into());
let file_type = FakeFileType::from(file_type);
results.push(DirEntry {
hg_path: filename,
fs_path: Cow::Owned(full_path.to_path_buf()),
symlink_metadata: None,
file_type,
})
}
Ok(results)
}
fn symlink_metadata(&self) -> Result<std::fs::Metadata, std::io::Error> {
match &self.symlink_metadata {
Some(meta) => Ok(meta.clone()),
None => std::fs::symlink_metadata(&self.fs_path),
}
}
fn is_dir(&self) -> bool {
self.file_type == FakeFileType::Directory
}
fn is_file(&self) -> bool {
self.file_type == FakeFileType::File
}
fn is_symlink(&self) -> bool {
self.file_type == FakeFileType::Symlink
}
fn is_bad(&self) -> Option<BadType> {
match self.file_type {
FakeFileType::BadType(ty) => Some(ty),
_ => None,
}
}
}