##// END OF EJS Templates
rust-pyo3: exposition of AncestorsIterator...
rust-pyo3: exposition of AncestorsIterator Compared to the early experiments, we have one less `RwLock` in the wrapping. Still that is somewhat redundant with `UnsafePyLeaked` being itself some kind of lock. In the original rust-cpython code, we were borrowing the `RefCell` with a method that can panic. Instead we are now converting the `PoisonError` that unlocking a `RwLock` can produce. Since all methods acquiring the `RwLock` are themselves protected by the GIL and do not release it before returning, nor do they leak RwLock guards, there is no risk of contention on these locks themselves.

File last commit:

r53200:22d24f6d default
r53428:4c9e3198 default
Show More
status.rs
1172 lines | 43.2 KiB | application/rls-services+xml | RustLexer
// status.rs
//
// Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
//! Rust implementation of dirstate.status (dirstate.py).
//! It is currently missing a lot of functionality compared to the Python one
//! and will only be triggered in narrow cases.
use crate::dirstate::dirstate_map::BorrowedPath;
use crate::dirstate::dirstate_map::ChildNodesRef;
use crate::dirstate::dirstate_map::DirstateMap;
use crate::dirstate::dirstate_map::DirstateVersion;
use crate::dirstate::dirstate_map::NodeRef;
use crate::dirstate::entry::TruncatedTimestamp;
use crate::dirstate::on_disk::DirstateV2ParseError;
use crate::filepatterns::PatternError;
use crate::filepatterns::PatternFileWarning;
use crate::matchers::get_ignore_function;
use crate::matchers::{Matcher, VisitChildrenSet};
use crate::utils::files::filesystem_now;
use crate::utils::files::get_bytes_from_os_string;
use crate::utils::files::get_bytes_from_path;
use crate::utils::files::get_path_from_bytes;
use crate::utils::hg_path::hg_path_to_path_buf;
use crate::utils::hg_path::HgPath;
use crate::utils::hg_path::HgPathError;
use once_cell::sync::OnceCell;
use rayon::prelude::*;
use sha1::{Digest, Sha1};
use std::io;
use std::os::unix::prelude::FileTypeExt;
use std::path::Path;
use std::path::PathBuf;
use std::sync::Mutex;
use std::{borrow::Cow, fmt};
/// Wrong type of file from a `BadMatch`
/// Note: a lot of those don't exist on all platforms.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum BadType {
CharacterDevice,
BlockDevice,
FIFO,
Socket,
Directory,
Unknown,
}
impl fmt::Display for BadType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str(match self {
BadType::CharacterDevice => "character device",
BadType::BlockDevice => "block device",
BadType::FIFO => "fifo",
BadType::Socket => "socket",
BadType::Directory => "directory",
BadType::Unknown => "unknown",
})
}
}
/// Was explicitly matched but cannot be found/accessed
#[derive(Debug, Copy, Clone)]
pub enum BadMatch {
OsError(i32),
BadType(BadType),
}
/// `Box<dyn Trait>` is syntactic sugar for `Box<dyn Trait + 'static>`, so add
/// an explicit lifetime here to not fight `'static` bounds "out of nowhere".
pub type IgnoreFnType<'a> =
Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>;
/// We have a good mix of owned (from directory traversal) and borrowed (from
/// the dirstate/explicit) paths, this comes up a lot.
pub type HgPathCow<'a> = Cow<'a, HgPath>;
#[derive(Debug, Copy, Clone)]
pub struct StatusOptions {
/// Whether we are on a filesystem with UNIX-like exec flags
pub check_exec: bool,
pub list_clean: bool,
pub list_unknown: bool,
pub list_ignored: bool,
/// Whether to populate `StatusPath::copy_source`
pub list_copies: bool,
/// Whether to collect traversed dirs for applying a callback later.
/// Used by `hg purge` for example.
pub collect_traversed_dirs: bool,
}
#[derive(Default)]
pub struct DirstateStatus<'a> {
/// The current time at the start of the `status()` algorithm, as measured
/// and possibly truncated by the filesystem.
pub filesystem_time_at_status_start: Option<TruncatedTimestamp>,
/// Tracked files whose contents have changed since the parent revision
pub modified: Vec<StatusPath<'a>>,
/// Newly-tracked files that were not present in the parent
pub added: Vec<StatusPath<'a>>,
/// Previously-tracked files that have been (re)moved with an hg command
pub removed: Vec<StatusPath<'a>>,
/// (Still) tracked files that are missing, (re)moved with an non-hg
/// command
pub deleted: Vec<StatusPath<'a>>,
/// Tracked files that are up to date with the parent.
/// Only pupulated if `StatusOptions::list_clean` is true.
pub clean: Vec<StatusPath<'a>>,
/// Files in the working directory that are ignored with `.hgignore`.
/// Only pupulated if `StatusOptions::list_ignored` is true.
pub ignored: Vec<StatusPath<'a>>,
/// Files in the working directory that are neither tracked nor ignored.
/// Only pupulated if `StatusOptions::list_unknown` is true.
pub unknown: Vec<StatusPath<'a>>,
/// Was explicitly matched but cannot be found/accessed
pub bad: Vec<(HgPathCow<'a>, BadMatch)>,
/// Either clean or modified, but we can’t tell from filesystem metadata
/// alone. The file contents need to be read and compared with that in
/// the parent.
pub unsure: Vec<StatusPath<'a>>,
/// Only filled if `collect_traversed_dirs` is `true`
pub traversed: Vec<HgPathCow<'a>>,
/// Whether `status()` made changed to the `DirstateMap` that should be
/// written back to disk
pub dirty: bool,
}
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct StatusPath<'a> {
pub path: HgPathCow<'a>,
pub copy_source: Option<HgPathCow<'a>>,
}
#[derive(Debug, derive_more::From)]
pub enum StatusError {
/// An invalid path that cannot be represented in Mercurial was found
Path(HgPathError),
/// An invalid "ignore" pattern was found
Pattern(PatternError),
/// Corrupted dirstate
DirstateV2ParseError(DirstateV2ParseError),
}
impl fmt::Display for StatusError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
StatusError::Path(error) => error.fmt(f),
StatusError::Pattern(error) => error.fmt(f),
StatusError::DirstateV2ParseError(_) => {
f.write_str("dirstate-v2 parse error")
}
}
}
}
/// Returns the status of the working directory compared to its parent
/// changeset.
///
/// This algorithm is based on traversing the filesystem tree (`fs` in function
/// and variable names) and dirstate tree at the same time. The core of this
/// traversal is the recursive `traverse_fs_directory_and_dirstate` function
/// and its use of `itertools::merge_join_by`. When reaching a path that only
/// exists in one of the two trees, depending on information requested by
/// `options` we may need to traverse the remaining subtree.
#[logging_timer::time("trace")]
pub fn status<'dirstate>(
dmap: &'dirstate mut DirstateMap,
matcher: &(dyn Matcher + Sync),
root_dir: PathBuf,
ignore_files: Vec<PathBuf>,
options: StatusOptions,
) -> Result<(DirstateStatus<'dirstate>, Vec<PatternFileWarning>), StatusError>
{
// Also cap for a Python caller of this function, but don't complain if
// the global threadpool has already been set since this code path is also
// being used by `rhg`, which calls this early.
let _ = crate::utils::cap_default_rayon_threads();
let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
if options.list_ignored || options.list_unknown {
let (ignore_fn, warnings, changed) = match dmap.dirstate_version {
DirstateVersion::V1 => {
let (ignore_fn, warnings) = get_ignore_function(
ignore_files,
&root_dir,
&mut |_source, _pattern_bytes| {},
)?;
(ignore_fn, warnings, None)
}
DirstateVersion::V2 => {
let mut hasher = Sha1::new();
let (ignore_fn, warnings) = get_ignore_function(
ignore_files,
&root_dir,
&mut |source, pattern_bytes| {
// If inside the repo, use the relative version to
// make it deterministic inside tests.
// The performance hit should be negligible.
let source = source
.strip_prefix(&root_dir)
.unwrap_or(source);
let source = get_bytes_from_path(source);
let mut subhasher = Sha1::new();
subhasher.update(pattern_bytes);
let patterns_hash = subhasher.finalize();
hasher.update(source);
hasher.update(b" ");
hasher.update(patterns_hash);
hasher.update(b"\n");
},
)?;
let new_hash = *hasher.finalize().as_ref();
let changed = new_hash != dmap.ignore_patterns_hash;
dmap.ignore_patterns_hash = new_hash;
(ignore_fn, warnings, Some(changed))
}
};
(ignore_fn, warnings, changed)
} else {
(Box::new(|&_| true), vec![], None)
};
let filesystem_time_at_status_start =
filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
// If the repository is under the current directory, prefer using a
// relative path, so the kernel needs to traverse fewer directory in every
// call to `read_dir` or `symlink_metadata`.
// This is effective in the common case where the current directory is the
// repository root.
// TODO: Better yet would be to use libc functions like `openat` and
// `fstatat` to remove such repeated traversals entirely, but the standard
// library does not provide APIs based on those.
// Maybe with a crate like https://crates.io/crates/openat instead?
let root_dir = if let Some(relative) = std::env::current_dir()
.ok()
.and_then(|cwd| root_dir.strip_prefix(cwd).ok())
{
relative
} else {
&root_dir
};
let outcome = DirstateStatus {
filesystem_time_at_status_start,
..Default::default()
};
let common = StatusCommon {
dmap,
options,
matcher,
ignore_fn,
outcome: Mutex::new(outcome),
ignore_patterns_have_changed: patterns_changed,
new_cacheable_directories: Default::default(),
outdated_cached_directories: Default::default(),
filesystem_time_at_status_start,
};
let is_at_repo_root = true;
let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
let has_ignored_ancestor = HasIgnoredAncestor::create(None, hg_path);
let root_cached_mtime = None;
// If the path we have for the repository root is a symlink, do follow it.
// (As opposed to symlinks within the working directory which are not
// followed, using `std::fs::symlink_metadata`.)
common.traverse_fs_directory_and_dirstate(
&has_ignored_ancestor,
dmap.root.as_ref(),
hg_path,
&DirEntry {
hg_path: Cow::Borrowed(HgPath::new(b"")),
fs_path: Cow::Borrowed(root_dir),
symlink_metadata: None,
file_type: FakeFileType::Directory,
},
root_cached_mtime,
is_at_repo_root,
)?;
if let Some(file_set) = common.matcher.file_set() {
for file in file_set {
if !file.is_empty() && !dmap.has_node(file)? {
let path = hg_path_to_path_buf(file)?;
if let io::Result::Err(error) =
root_dir.join(path).symlink_metadata()
{
common.io_error(error, file)
}
}
}
}
let mut outcome = common.outcome.into_inner().unwrap();
let new_cacheable = common.new_cacheable_directories.into_inner().unwrap();
let outdated = common.outdated_cached_directories.into_inner().unwrap();
outcome.dirty = common.ignore_patterns_have_changed == Some(true)
|| !outdated.is_empty()
|| (!new_cacheable.is_empty()
&& dmap.dirstate_version == DirstateVersion::V2);
// Remove outdated mtimes before adding new mtimes, in case a given
// directory is both
for path in &outdated {
dmap.clear_cached_mtime(path)?;
}
for (path, mtime) in &new_cacheable {
dmap.set_cached_mtime(path, *mtime)?;
}
Ok((outcome, warnings))
}
/// Bag of random things needed by various parts of the algorithm. Reduces the
/// number of parameters passed to functions.
struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
dmap: &'tree DirstateMap<'on_disk>,
options: StatusOptions,
matcher: &'a (dyn Matcher + Sync),
ignore_fn: IgnoreFnType<'a>,
outcome: Mutex<DirstateStatus<'on_disk>>,
/// New timestamps of directories to be used for caching their readdirs
new_cacheable_directories:
Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
/// Used to invalidate the readdir cache of directories
outdated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
/// Whether ignore files like `.hgignore` have changed since the previous
/// time a `status()` call wrote their hash to the dirstate. `None` means
/// we don’t know as this run doesn’t list either ignored or uknown files
/// and therefore isn’t reading `.hgignore`.
ignore_patterns_have_changed: Option<bool>,
/// The current time at the start of the `status()` algorithm, as measured
/// and possibly truncated by the filesystem.
filesystem_time_at_status_start: Option<TruncatedTimestamp>,
}
enum Outcome {
Modified,
Added,
Removed,
Deleted,
Clean,
Ignored,
Unknown,
Unsure,
}
/// Lazy computation of whether a given path has a hgignored
/// ancestor.
struct HasIgnoredAncestor<'a> {
/// `path` and `parent` constitute the inputs to the computation,
/// `cache` stores the outcome.
path: &'a HgPath,
parent: Option<&'a HasIgnoredAncestor<'a>>,
cache: OnceCell<bool>,
}
impl<'a> HasIgnoredAncestor<'a> {
fn create(
parent: Option<&'a HasIgnoredAncestor<'a>>,
path: &'a HgPath,
) -> HasIgnoredAncestor<'a> {
Self {
path,
parent,
cache: OnceCell::new(),
}
}
fn force(&self, ignore_fn: &IgnoreFnType<'_>) -> bool {
match self.parent {
None => false,
Some(parent) => {
*(self.cache.get_or_init(|| {
parent.force(ignore_fn) || ignore_fn(self.path)
}))
}
}
}
}
impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
fn push_outcome(
&self,
which: Outcome,
dirstate_node: &NodeRef<'tree, 'on_disk>,
) -> Result<(), DirstateV2ParseError> {
let path = dirstate_node
.full_path_borrowed(self.dmap.on_disk)?
.detach_from_tree();
let copy_source = if self.options.list_copies {
dirstate_node
.copy_source_borrowed(self.dmap.on_disk)?
.map(|source| source.detach_from_tree())
} else {
None
};
self.push_outcome_common(which, path, copy_source);
Ok(())
}
fn push_outcome_without_copy_source(
&self,
which: Outcome,
path: &BorrowedPath<'_, 'on_disk>,
) {
self.push_outcome_common(which, path.detach_from_tree(), None)
}
fn push_outcome_common(
&self,
which: Outcome,
path: HgPathCow<'on_disk>,
copy_source: Option<HgPathCow<'on_disk>>,
) {
let mut outcome = self.outcome.lock().unwrap();
let vec = match which {
Outcome::Modified => &mut outcome.modified,
Outcome::Added => &mut outcome.added,
Outcome::Removed => &mut outcome.removed,
Outcome::Deleted => &mut outcome.deleted,
Outcome::Clean => &mut outcome.clean,
Outcome::Ignored => &mut outcome.ignored,
Outcome::Unknown => &mut outcome.unknown,
Outcome::Unsure => &mut outcome.unsure,
};
vec.push(StatusPath { path, copy_source });
}
fn read_dir(
&self,
hg_path: &HgPath,
fs_path: &Path,
is_at_repo_root: bool,
) -> Result<Vec<DirEntry>, ()> {
DirEntry::read_dir(fs_path, is_at_repo_root)
.map_err(|error| self.io_error(error, hg_path))
}
fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
let errno = error.raw_os_error().expect("expected real OS error");
self.outcome
.lock()
.unwrap()
.bad
.push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
}
fn check_for_outdated_directory_cache(
&self,
dirstate_node: &NodeRef<'tree, 'on_disk>,
) -> Result<bool, DirstateV2ParseError> {
if self.ignore_patterns_have_changed == Some(true)
&& dirstate_node.cached_directory_mtime()?.is_some()
{
self.outdated_cached_directories.lock().unwrap().push(
dirstate_node
.full_path_borrowed(self.dmap.on_disk)?
.detach_from_tree(),
);
return Ok(true);
}
Ok(false)
}
/// If this returns true, we can get accurate results by only using
/// `symlink_metadata` for child nodes that exist in the dirstate and don’t
/// need to call `read_dir`.
fn can_skip_fs_readdir(
&self,
directory_entry: &DirEntry,
cached_directory_mtime: Option<TruncatedTimestamp>,
) -> bool {
if !self.options.list_unknown && !self.options.list_ignored {
// All states that we care about listing have corresponding
// dirstate entries.
// This happens for example with `hg status -mard`.
return true;
}
if !self.options.list_ignored
&& self.ignore_patterns_have_changed == Some(false)
{
if let Some(cached_mtime) = cached_directory_mtime {
// The dirstate contains a cached mtime for this directory, set
// by a previous run of the `status` algorithm which found this
// directory eligible for `read_dir` caching.
if let Ok(meta) = directory_entry.symlink_metadata() {
if cached_mtime
.likely_equal_to_mtime_of(&meta)
.unwrap_or(false)
{
// The mtime of that directory has not changed
// since then, which means that the results of
// `read_dir` should also be unchanged.
return true;
}
}
}
}
false
}
fn should_visit(set: &VisitChildrenSet, basename: &HgPath) -> bool {
match set {
VisitChildrenSet::This | VisitChildrenSet::Recursive => true,
VisitChildrenSet::Empty => false,
VisitChildrenSet::Set(children_to_visit) => {
children_to_visit.contains(basename)
}
}
}
/// Returns whether all child entries of the filesystem directory have a
/// corresponding dirstate node or are ignored.
fn traverse_fs_directory_and_dirstate<'ancestor>(
&self,
has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
directory_entry: &DirEntry,
cached_directory_mtime: Option<TruncatedTimestamp>,
is_at_repo_root: bool,
) -> Result<bool, DirstateV2ParseError> {
let children_set = self.matcher.visit_children_set(directory_hg_path);
if let VisitChildrenSet::Empty = children_set {
return Ok(false);
}
if self.can_skip_fs_readdir(directory_entry, cached_directory_mtime) {
dirstate_nodes
.par_iter()
.map(|dirstate_node| {
let fs_path = &directory_entry.fs_path;
let basename =
dirstate_node.base_name(self.dmap.on_disk)?.as_bytes();
let fs_path = fs_path.join(get_path_from_bytes(basename));
if !Self::should_visit(
&children_set,
HgPath::new(basename),
) {
return Ok(());
}
match std::fs::symlink_metadata(&fs_path) {
Ok(fs_metadata) => {
let file_type = fs_metadata.file_type().into();
let entry = DirEntry {
hg_path: Cow::Borrowed(
dirstate_node
.full_path(self.dmap.on_disk)?,
),
fs_path: Cow::Borrowed(&fs_path),
symlink_metadata: Some(fs_metadata),
file_type,
};
self.traverse_fs_and_dirstate(
&entry,
dirstate_node,
has_ignored_ancestor,
)
}
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
self.traverse_dirstate_only(dirstate_node)
}
Err(error) => {
let hg_path =
dirstate_node.full_path(self.dmap.on_disk)?;
self.io_error(error, hg_path);
Ok(())
}
}
})
.collect::<Result<(), _>>()?;
// We don’t know, so conservatively say this isn’t the case
let children_all_have_dirstate_node_or_are_ignored = false;
return Ok(children_all_have_dirstate_node_or_are_ignored);
}
let readdir_succeeded;
let mut fs_entries = if let Ok(entries) = self.read_dir(
directory_hg_path,
&directory_entry.fs_path,
is_at_repo_root,
) {
readdir_succeeded = true;
entries
} else {
// Treat an unreadable directory (typically because of insufficient
// permissions) like an empty directory. `self.read_dir` has
// already called `self.io_error` so a warning will be emitted.
// We still need to remember that there was an error so that we
// know not to cache this result.
readdir_succeeded = false;
Vec::new()
};
// `merge_join_by` requires both its input iterators to be sorted:
let dirstate_nodes = dirstate_nodes.sorted();
// `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
// https://github.com/rust-lang/rust/issues/34162
fs_entries.sort_unstable_by(|e1, e2| e1.hg_path.cmp(&e2.hg_path));
// Propagate here any error that would happen inside the comparison
// callback below
for dirstate_node in &dirstate_nodes {
dirstate_node.base_name(self.dmap.on_disk)?;
}
itertools::merge_join_by(
dirstate_nodes,
&fs_entries,
|dirstate_node, fs_entry| {
// This `unwrap` never panics because we already propagated
// those errors above
dirstate_node
.base_name(self.dmap.on_disk)
.unwrap()
.cmp(&fs_entry.hg_path)
},
)
.par_bridge()
.map(|pair| {
use itertools::EitherOrBoth::*;
let basename = match &pair {
Left(dirstate_node) | Both(dirstate_node, _) => HgPath::new(
dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
),
Right(fs_entry) => &fs_entry.hg_path,
};
if !Self::should_visit(&children_set, basename) {
return Ok(false);
}
let has_dirstate_node_or_is_ignored = match pair {
Both(dirstate_node, fs_entry) => {
self.traverse_fs_and_dirstate(
fs_entry,
dirstate_node,
has_ignored_ancestor,
)?;
true
}
Left(dirstate_node) => {
self.traverse_dirstate_only(dirstate_node)?;
true
}
Right(fs_entry) => self.traverse_fs_only(
has_ignored_ancestor.force(&self.ignore_fn),
directory_hg_path,
fs_entry,
),
};
Ok(has_dirstate_node_or_is_ignored)
})
.try_reduce(|| true, |a, b| Ok(a && b))
.map(|res| res && readdir_succeeded)
}
fn traverse_fs_and_dirstate<'ancestor>(
&self,
fs_entry: &DirEntry,
dirstate_node: NodeRef<'tree, 'on_disk>,
has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
) -> Result<(), DirstateV2ParseError> {
let outdated_dircache =
self.check_for_outdated_directory_cache(&dirstate_node)?;
let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink();
if !file_or_symlink {
// If we previously had a file here, it was removed (with
// `hg rm` or similar) or deleted before it could be
// replaced by a directory or something else.
self.mark_removed_or_deleted_if_file(&dirstate_node)?;
}
if let Some(bad_type) = fs_entry.is_bad() {
if self.matcher.exact_match(hg_path) {
let path = dirstate_node.full_path(self.dmap.on_disk)?;
self.outcome.lock().unwrap().bad.push((
path.to_owned().into(),
BadMatch::BadType(bad_type),
))
}
}
if fs_entry.is_dir() {
if self.options.collect_traversed_dirs {
self.outcome
.lock()
.unwrap()
.traversed
.push(hg_path.detach_from_tree())
}
let is_ignored = HasIgnoredAncestor::create(
Some(has_ignored_ancestor),
hg_path,
);
let is_at_repo_root = false;
let children_all_have_dirstate_node_or_are_ignored = self
.traverse_fs_directory_and_dirstate(
&is_ignored,
dirstate_node.children(self.dmap.on_disk)?,
hg_path,
fs_entry,
dirstate_node.cached_directory_mtime()?,
is_at_repo_root,
)?;
self.maybe_save_directory_mtime(
children_all_have_dirstate_node_or_are_ignored,
fs_entry,
dirstate_node,
outdated_dircache,
)?
} else {
if file_or_symlink && self.matcher.matches(hg_path) {
if let Some(entry) = dirstate_node.entry()? {
if !entry.any_tracked() {
// Forward-compat if we start tracking unknown/ignored
// files for caching reasons
self.mark_unknown_or_ignored(
has_ignored_ancestor.force(&self.ignore_fn),
hg_path,
);
}
if entry.added() {
self.push_outcome(Outcome::Added, &dirstate_node)?;
} else if entry.removed() {
self.push_outcome(Outcome::Removed, &dirstate_node)?;
} else if entry.modified() {
self.push_outcome(Outcome::Modified, &dirstate_node)?;
} else {
self.handle_normal_file(&dirstate_node, fs_entry)?;
}
} else {
// `node.entry.is_none()` indicates a "directory"
// node, but the filesystem has a file
self.mark_unknown_or_ignored(
has_ignored_ancestor.force(&self.ignore_fn),
hg_path,
);
}
}
for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
{
self.traverse_dirstate_only(child_node)?
}
}
Ok(())
}
/// Save directory mtime if applicable.
///
/// `outdated_directory_cache` is `true` if we've just invalidated the
/// cache for this directory in `check_for_outdated_directory_cache`,
/// which forces the update.
fn maybe_save_directory_mtime(
&self,
children_all_have_dirstate_node_or_are_ignored: bool,
directory_entry: &DirEntry,
dirstate_node: NodeRef<'tree, 'on_disk>,
outdated_directory_cache: bool,
) -> Result<(), DirstateV2ParseError> {
if !children_all_have_dirstate_node_or_are_ignored {
return Ok(());
}
// All filesystem directory entries from `read_dir` have a
// corresponding node in the dirstate, so we can reconstitute the
// names of those entries without calling `read_dir` again.
// TODO: use let-else here and below when available:
// https://github.com/rust-lang/rust/issues/87335
let status_start = if let Some(status_start) =
&self.filesystem_time_at_status_start
{
status_start
} else {
return Ok(());
};
// Although the Rust standard library’s `SystemTime` type
// has nanosecond precision, the times reported for a
// directory’s (or file’s) modified time may have lower
// resolution based on the filesystem (for example ext3
// only stores integer seconds), kernel (see
// https://stackoverflow.com/a/14393315/1162888), etc.
let metadata = match directory_entry.symlink_metadata() {
Ok(meta) => meta,
Err(_) => return Ok(()),
};
let directory_mtime = match TruncatedTimestamp::for_reliable_mtime_of(
&metadata,
status_start,
) {
Ok(Some(directory_mtime)) => directory_mtime,
Ok(None) => {
// The directory was modified too recently,
// don’t cache its `read_dir` results.
//
// 1. A change to this directory (direct child was added or
// removed) cause its mtime to be set (possibly truncated)
// to `directory_mtime`
// 2. This `status` algorithm calls `read_dir`
// 3. An other change is made to the same directory is made so
// that calling `read_dir` agin would give different
// results, but soon enough after 1. that the mtime stays
// the same
//
// On a system where the time resolution poor, this
// scenario is not unlikely if all three steps are caused
// by the same script.
return Ok(());
}
Err(_) => {
// OS/libc does not support mtime?
return Ok(());
}
};
// We’ve observed (through `status_start`) that time has
// “progressed” since `directory_mtime`, so any further
// change to this directory is extremely likely to cause a
// different mtime.
//
// Having the same mtime again is not entirely impossible
// since the system clock is not monotonous. It could jump
// backward to some point before `directory_mtime`, then a
// directory change could potentially happen during exactly
// the wrong tick.
//
// We deem this scenario (unlike the previous one) to be
// unlikely enough in practice.
let is_up_to_date = if let Some(cached) =
dirstate_node.cached_directory_mtime()?
{
!outdated_directory_cache && cached.likely_equal(directory_mtime)
} else {
false
};
if !is_up_to_date {
let hg_path = dirstate_node
.full_path_borrowed(self.dmap.on_disk)?
.detach_from_tree();
self.new_cacheable_directories
.lock()
.unwrap()
.push((hg_path, directory_mtime))
}
Ok(())
}
/// A file that is clean in the dirstate was found in the filesystem
fn handle_normal_file(
&self,
dirstate_node: &NodeRef<'tree, 'on_disk>,
fs_entry: &DirEntry,
) -> Result<(), DirstateV2ParseError> {
// Keep the low 31 bits
fn truncate_u64(value: u64) -> i32 {
(value & 0x7FFF_FFFF) as i32
}
let fs_metadata = match fs_entry.symlink_metadata() {
Ok(meta) => meta,
Err(_) => return Ok(()),
};
let entry = dirstate_node
.entry()?
.expect("handle_normal_file called with entry-less node");
let mode_changed =
|| self.options.check_exec && entry.mode_changed(&fs_metadata);
let size = entry.size();
let size_changed = size != truncate_u64(fs_metadata.len());
if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
// issue6456: Size returned may be longer due to encryption
// on EXT-4 fscrypt. TODO maybe only do it on EXT4?
self.push_outcome(Outcome::Unsure, dirstate_node)?
} else if dirstate_node.has_copy_source()
|| entry.is_from_other_parent()
|| (size >= 0 && (size_changed || mode_changed()))
{
self.push_outcome(Outcome::Modified, dirstate_node)?
} else {
let mtime_looks_clean = if let Some(dirstate_mtime) =
entry.truncated_mtime()
{
let fs_mtime = TruncatedTimestamp::for_mtime_of(&fs_metadata)
.expect("OS/libc does not support mtime?");
// There might be a change in the future if for example the
// internal clock become off while process run, but this is a
// case where the issues the user would face
// would be a lot worse and there is nothing we
// can really do.
fs_mtime.likely_equal(dirstate_mtime)
} else {
// No mtime in the dirstate entry
false
};
if !mtime_looks_clean {
self.push_outcome(Outcome::Unsure, dirstate_node)?
} else if self.options.list_clean {
self.push_outcome(Outcome::Clean, dirstate_node)?
}
}
Ok(())
}
/// A node in the dirstate tree has no corresponding filesystem entry
fn traverse_dirstate_only(
&self,
dirstate_node: NodeRef<'tree, 'on_disk>,
) -> Result<(), DirstateV2ParseError> {
self.check_for_outdated_directory_cache(&dirstate_node)?;
self.mark_removed_or_deleted_if_file(&dirstate_node)?;
dirstate_node
.children(self.dmap.on_disk)?
.par_iter()
.map(|child_node| self.traverse_dirstate_only(child_node))
.collect()
}
/// A node in the dirstate tree has no corresponding *file* on the
/// filesystem
///
/// Does nothing on a "directory" node
fn mark_removed_or_deleted_if_file(
&self,
dirstate_node: &NodeRef<'tree, 'on_disk>,
) -> Result<(), DirstateV2ParseError> {
if let Some(entry) = dirstate_node.entry()? {
if !entry.any_tracked() {
// Future-compat for when we start storing ignored and unknown
// files for caching reasons
return Ok(());
}
let path = dirstate_node.full_path(self.dmap.on_disk)?;
if self.matcher.matches(path) {
if entry.removed() {
self.push_outcome(Outcome::Removed, dirstate_node)?
} else {
self.push_outcome(Outcome::Deleted, dirstate_node)?
}
}
}
Ok(())
}
/// Something in the filesystem has no corresponding dirstate node
///
/// Returns whether that path is ignored
fn traverse_fs_only(
&self,
has_ignored_ancestor: bool,
directory_hg_path: &HgPath,
fs_entry: &DirEntry,
) -> bool {
let hg_path = directory_hg_path.join(&fs_entry.hg_path);
let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink();
if fs_entry.is_dir() {
let is_ignored =
has_ignored_ancestor || (self.ignore_fn)(&hg_path);
let traverse_children = if is_ignored {
// Descendants of an ignored directory are all ignored
self.options.list_ignored
} else {
// Descendants of an unknown directory may be either unknown or
// ignored
self.options.list_unknown || self.options.list_ignored
};
if traverse_children {
let is_at_repo_root = false;
if let Ok(children_fs_entries) =
self.read_dir(&hg_path, &fs_entry.fs_path, is_at_repo_root)
{
children_fs_entries.par_iter().for_each(|child_fs_entry| {
self.traverse_fs_only(
is_ignored,
&hg_path,
child_fs_entry,
);
})
}
if self.options.collect_traversed_dirs {
self.outcome.lock().unwrap().traversed.push(hg_path.into())
}
}
is_ignored
} else if file_or_symlink {
if self.matcher.matches(&hg_path) {
self.mark_unknown_or_ignored(
has_ignored_ancestor,
&BorrowedPath::InMemory(&hg_path),
)
} else {
// We haven’t computed whether this path is ignored. It
// might not be, and a future run of status might have a
// different matcher that matches it. So treat it as not
// ignored. That is, inhibit readdir caching of the parent
// directory.
false
}
} else {
// This is neither a directory, a plain file, or a symlink.
// Treat it like an ignored file.
true
}
}
/// Returns whether that path is ignored
fn mark_unknown_or_ignored(
&self,
has_ignored_ancestor: bool,
hg_path: &BorrowedPath<'_, 'on_disk>,
) -> bool {
let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
if is_ignored {
if self.options.list_ignored {
self.push_outcome_without_copy_source(
Outcome::Ignored,
hg_path,
)
}
} else if self.options.list_unknown {
self.push_outcome_without_copy_source(Outcome::Unknown, hg_path)
}
is_ignored
}
}
/// Since [`std::fs::FileType`] cannot be built directly, we emulate what we
/// care about.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
enum FakeFileType {
File,
Directory,
Symlink,
BadType(BadType),
}
impl From<std::fs::FileType> for FakeFileType {
fn from(f: std::fs::FileType) -> Self {
if f.is_dir() {
Self::Directory
} else if f.is_file() {
Self::File
} else if f.is_symlink() {
Self::Symlink
} else if f.is_fifo() {
Self::BadType(BadType::FIFO)
} else if f.is_block_device() {
Self::BadType(BadType::BlockDevice)
} else if f.is_char_device() {
Self::BadType(BadType::CharacterDevice)
} else if f.is_socket() {
Self::BadType(BadType::Socket)
} else {
Self::BadType(BadType::Unknown)
}
}
}
struct DirEntry<'a> {
/// Path as stored in the dirstate, or just the filename for optimization.
hg_path: HgPathCow<'a>,
/// Filesystem path
fs_path: Cow<'a, Path>,
/// Lazily computed
symlink_metadata: Option<std::fs::Metadata>,
/// Already computed for ergonomics.
file_type: FakeFileType,
}
impl<'a> DirEntry<'a> {
/// Returns **unsorted** entries in the given directory, with name,
/// metadata and file type.
///
/// If a `.hg` sub-directory is encountered:
///
/// * At the repository root, ignore that sub-directory
/// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
/// list instead.
fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
// `read_dir` returns a "not found" error for the empty path
let at_cwd = path == Path::new("");
let read_dir_path = if at_cwd { Path::new(".") } else { path };
let mut results = Vec::new();
for entry in read_dir_path.read_dir()? {
let entry = entry?;
let file_type = match entry.file_type() {
Ok(v) => v,
Err(e) => {
// race with file deletion?
if e.kind() == std::io::ErrorKind::NotFound {
continue;
} else {
return Err(e);
}
}
};
let file_name = entry.file_name();
// FIXME don't do this when cached
if file_name == ".hg" {
if is_at_repo_root {
// Skip the repo’s own .hg (might be a symlink)
continue;
} else if file_type.is_dir() {
// A .hg sub-directory at another location means a subrepo,
// skip it entirely.
return Ok(Vec::new());
}
}
let full_path = if at_cwd {
file_name.clone().into()
} else {
entry.path()
};
let filename =
Cow::Owned(get_bytes_from_os_string(file_name).into());
let file_type = FakeFileType::from(file_type);
results.push(DirEntry {
hg_path: filename,
fs_path: Cow::Owned(full_path.to_path_buf()),
symlink_metadata: None,
file_type,
})
}
Ok(results)
}
fn symlink_metadata(&self) -> Result<std::fs::Metadata, std::io::Error> {
match &self.symlink_metadata {
Some(meta) => Ok(meta.clone()),
None => std::fs::symlink_metadata(&self.fs_path),
}
}
fn is_dir(&self) -> bool {
self.file_type == FakeFileType::Directory
}
fn is_file(&self) -> bool {
self.file_type == FakeFileType::File
}
fn is_symlink(&self) -> bool {
self.file_type == FakeFileType::Symlink
}
fn is_bad(&self) -> Option<BadType> {
match self.file_type {
FakeFileType::BadType(ty) => Some(ty),
_ => None,
}
}
}