##// END OF EJS Templates
rust-status: fix thread count ceiling...
Raphaël Gomès -
r51052:c5243582 stable
parent child Browse files
Show More
@@ -1,931 +1,931 b''
1 use crate::dirstate::entry::TruncatedTimestamp;
1 use crate::dirstate::entry::TruncatedTimestamp;
2 use crate::dirstate::status::IgnoreFnType;
2 use crate::dirstate::status::IgnoreFnType;
3 use crate::dirstate::status::StatusPath;
3 use crate::dirstate::status::StatusPath;
4 use crate::dirstate_tree::dirstate_map::BorrowedPath;
4 use crate::dirstate_tree::dirstate_map::BorrowedPath;
5 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
5 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
6 use crate::dirstate_tree::dirstate_map::DirstateMap;
6 use crate::dirstate_tree::dirstate_map::DirstateMap;
7 use crate::dirstate_tree::dirstate_map::DirstateVersion;
7 use crate::dirstate_tree::dirstate_map::DirstateVersion;
8 use crate::dirstate_tree::dirstate_map::NodeRef;
8 use crate::dirstate_tree::dirstate_map::NodeRef;
9 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
9 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
10 use crate::matchers::get_ignore_function;
10 use crate::matchers::get_ignore_function;
11 use crate::matchers::Matcher;
11 use crate::matchers::Matcher;
12 use crate::utils::files::get_bytes_from_os_string;
12 use crate::utils::files::get_bytes_from_os_string;
13 use crate::utils::files::get_bytes_from_path;
13 use crate::utils::files::get_bytes_from_path;
14 use crate::utils::files::get_path_from_bytes;
14 use crate::utils::files::get_path_from_bytes;
15 use crate::utils::hg_path::HgPath;
15 use crate::utils::hg_path::HgPath;
16 use crate::BadMatch;
16 use crate::BadMatch;
17 use crate::DirstateStatus;
17 use crate::DirstateStatus;
18 use crate::HgPathBuf;
18 use crate::HgPathBuf;
19 use crate::HgPathCow;
19 use crate::HgPathCow;
20 use crate::PatternFileWarning;
20 use crate::PatternFileWarning;
21 use crate::StatusError;
21 use crate::StatusError;
22 use crate::StatusOptions;
22 use crate::StatusOptions;
23 use micro_timer::timed;
23 use micro_timer::timed;
24 use once_cell::sync::OnceCell;
24 use once_cell::sync::OnceCell;
25 use rayon::prelude::*;
25 use rayon::prelude::*;
26 use sha1::{Digest, Sha1};
26 use sha1::{Digest, Sha1};
27 use std::borrow::Cow;
27 use std::borrow::Cow;
28 use std::io;
28 use std::io;
29 use std::path::Path;
29 use std::path::Path;
30 use std::path::PathBuf;
30 use std::path::PathBuf;
31 use std::sync::Mutex;
31 use std::sync::Mutex;
32 use std::time::SystemTime;
32 use std::time::SystemTime;
33
33
34 /// Returns the status of the working directory compared to its parent
34 /// Returns the status of the working directory compared to its parent
35 /// changeset.
35 /// changeset.
36 ///
36 ///
37 /// This algorithm is based on traversing the filesystem tree (`fs` in function
37 /// This algorithm is based on traversing the filesystem tree (`fs` in function
38 /// and variable names) and dirstate tree at the same time. The core of this
38 /// and variable names) and dirstate tree at the same time. The core of this
39 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
39 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
40 /// and its use of `itertools::merge_join_by`. When reaching a path that only
40 /// and its use of `itertools::merge_join_by`. When reaching a path that only
41 /// exists in one of the two trees, depending on information requested by
41 /// exists in one of the two trees, depending on information requested by
42 /// `options` we may need to traverse the remaining subtree.
42 /// `options` we may need to traverse the remaining subtree.
43 #[timed]
43 #[timed]
44 pub fn status<'dirstate>(
44 pub fn status<'dirstate>(
45 dmap: &'dirstate mut DirstateMap,
45 dmap: &'dirstate mut DirstateMap,
46 matcher: &(dyn Matcher + Sync),
46 matcher: &(dyn Matcher + Sync),
47 root_dir: PathBuf,
47 root_dir: PathBuf,
48 ignore_files: Vec<PathBuf>,
48 ignore_files: Vec<PathBuf>,
49 options: StatusOptions,
49 options: StatusOptions,
50 ) -> Result<(DirstateStatus<'dirstate>, Vec<PatternFileWarning>), StatusError>
50 ) -> Result<(DirstateStatus<'dirstate>, Vec<PatternFileWarning>), StatusError>
51 {
51 {
52 // Force the global rayon threadpool to not exceed 16 concurrent threads.
52 // Force the global rayon threadpool to not exceed 16 concurrent threads.
53 // This is a stop-gap measure until we figure out why using more than 16
53 // This is a stop-gap measure until we figure out why using more than 16
54 // threads makes `status` slower for each additional thread.
54 // threads makes `status` slower for each additional thread.
55 // We use `ok()` in case the global threadpool has already been
55 // We use `ok()` in case the global threadpool has already been
56 // instantiated in `rhg` or some other caller.
56 // instantiated in `rhg` or some other caller.
57 // TODO find the underlying cause and fix it, then remove this.
57 // TODO find the underlying cause and fix it, then remove this.
58 rayon::ThreadPoolBuilder::new()
58 rayon::ThreadPoolBuilder::new()
59 .num_threads(16)
59 .num_threads(16.min(rayon::current_num_threads()))
60 .build_global()
60 .build_global()
61 .ok();
61 .ok();
62
62
63 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
63 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
64 if options.list_ignored || options.list_unknown {
64 if options.list_ignored || options.list_unknown {
65 let (ignore_fn, warnings, changed) = match dmap.dirstate_version {
65 let (ignore_fn, warnings, changed) = match dmap.dirstate_version {
66 DirstateVersion::V1 => {
66 DirstateVersion::V1 => {
67 let (ignore_fn, warnings) = get_ignore_function(
67 let (ignore_fn, warnings) = get_ignore_function(
68 ignore_files,
68 ignore_files,
69 &root_dir,
69 &root_dir,
70 &mut |_source, _pattern_bytes| {},
70 &mut |_source, _pattern_bytes| {},
71 )?;
71 )?;
72 (ignore_fn, warnings, None)
72 (ignore_fn, warnings, None)
73 }
73 }
74 DirstateVersion::V2 => {
74 DirstateVersion::V2 => {
75 let mut hasher = Sha1::new();
75 let mut hasher = Sha1::new();
76 let (ignore_fn, warnings) = get_ignore_function(
76 let (ignore_fn, warnings) = get_ignore_function(
77 ignore_files,
77 ignore_files,
78 &root_dir,
78 &root_dir,
79 &mut |source, pattern_bytes| {
79 &mut |source, pattern_bytes| {
80 // If inside the repo, use the relative version to
80 // If inside the repo, use the relative version to
81 // make it deterministic inside tests.
81 // make it deterministic inside tests.
82 // The performance hit should be negligible.
82 // The performance hit should be negligible.
83 let source = source
83 let source = source
84 .strip_prefix(&root_dir)
84 .strip_prefix(&root_dir)
85 .unwrap_or(source);
85 .unwrap_or(source);
86 let source = get_bytes_from_path(source);
86 let source = get_bytes_from_path(source);
87
87
88 let mut subhasher = Sha1::new();
88 let mut subhasher = Sha1::new();
89 subhasher.update(pattern_bytes);
89 subhasher.update(pattern_bytes);
90 let patterns_hash = subhasher.finalize();
90 let patterns_hash = subhasher.finalize();
91
91
92 hasher.update(source);
92 hasher.update(source);
93 hasher.update(b" ");
93 hasher.update(b" ");
94 hasher.update(patterns_hash);
94 hasher.update(patterns_hash);
95 hasher.update(b"\n");
95 hasher.update(b"\n");
96 },
96 },
97 )?;
97 )?;
98 let new_hash = *hasher.finalize().as_ref();
98 let new_hash = *hasher.finalize().as_ref();
99 let changed = new_hash != dmap.ignore_patterns_hash;
99 let changed = new_hash != dmap.ignore_patterns_hash;
100 dmap.ignore_patterns_hash = new_hash;
100 dmap.ignore_patterns_hash = new_hash;
101 (ignore_fn, warnings, Some(changed))
101 (ignore_fn, warnings, Some(changed))
102 }
102 }
103 };
103 };
104 (ignore_fn, warnings, changed)
104 (ignore_fn, warnings, changed)
105 } else {
105 } else {
106 (Box::new(|&_| true), vec![], None)
106 (Box::new(|&_| true), vec![], None)
107 };
107 };
108
108
109 let filesystem_time_at_status_start =
109 let filesystem_time_at_status_start =
110 filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
110 filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
111
111
112 // If the repository is under the current directory, prefer using a
112 // If the repository is under the current directory, prefer using a
113 // relative path, so the kernel needs to traverse fewer directory in every
113 // relative path, so the kernel needs to traverse fewer directory in every
114 // call to `read_dir` or `symlink_metadata`.
114 // call to `read_dir` or `symlink_metadata`.
115 // This is effective in the common case where the current directory is the
115 // This is effective in the common case where the current directory is the
116 // repository root.
116 // repository root.
117
117
118 // TODO: Better yet would be to use libc functions like `openat` and
118 // TODO: Better yet would be to use libc functions like `openat` and
119 // `fstatat` to remove such repeated traversals entirely, but the standard
119 // `fstatat` to remove such repeated traversals entirely, but the standard
120 // library does not provide APIs based on those.
120 // library does not provide APIs based on those.
121 // Maybe with a crate like https://crates.io/crates/openat instead?
121 // Maybe with a crate like https://crates.io/crates/openat instead?
122 let root_dir = if let Some(relative) = std::env::current_dir()
122 let root_dir = if let Some(relative) = std::env::current_dir()
123 .ok()
123 .ok()
124 .and_then(|cwd| root_dir.strip_prefix(cwd).ok())
124 .and_then(|cwd| root_dir.strip_prefix(cwd).ok())
125 {
125 {
126 relative
126 relative
127 } else {
127 } else {
128 &root_dir
128 &root_dir
129 };
129 };
130
130
131 let outcome = DirstateStatus {
131 let outcome = DirstateStatus {
132 filesystem_time_at_status_start,
132 filesystem_time_at_status_start,
133 ..Default::default()
133 ..Default::default()
134 };
134 };
135 let common = StatusCommon {
135 let common = StatusCommon {
136 dmap,
136 dmap,
137 options,
137 options,
138 matcher,
138 matcher,
139 ignore_fn,
139 ignore_fn,
140 outcome: Mutex::new(outcome),
140 outcome: Mutex::new(outcome),
141 ignore_patterns_have_changed: patterns_changed,
141 ignore_patterns_have_changed: patterns_changed,
142 new_cacheable_directories: Default::default(),
142 new_cacheable_directories: Default::default(),
143 outdated_cached_directories: Default::default(),
143 outdated_cached_directories: Default::default(),
144 filesystem_time_at_status_start,
144 filesystem_time_at_status_start,
145 };
145 };
146 let is_at_repo_root = true;
146 let is_at_repo_root = true;
147 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
147 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
148 let has_ignored_ancestor = HasIgnoredAncestor::create(None, hg_path);
148 let has_ignored_ancestor = HasIgnoredAncestor::create(None, hg_path);
149 let root_cached_mtime = None;
149 let root_cached_mtime = None;
150 let root_dir_metadata = None;
150 let root_dir_metadata = None;
151 // If the path we have for the repository root is a symlink, do follow it.
151 // If the path we have for the repository root is a symlink, do follow it.
152 // (As opposed to symlinks within the working directory which are not
152 // (As opposed to symlinks within the working directory which are not
153 // followed, using `std::fs::symlink_metadata`.)
153 // followed, using `std::fs::symlink_metadata`.)
154 common.traverse_fs_directory_and_dirstate(
154 common.traverse_fs_directory_and_dirstate(
155 &has_ignored_ancestor,
155 &has_ignored_ancestor,
156 dmap.root.as_ref(),
156 dmap.root.as_ref(),
157 hg_path,
157 hg_path,
158 &root_dir,
158 &root_dir,
159 root_dir_metadata,
159 root_dir_metadata,
160 root_cached_mtime,
160 root_cached_mtime,
161 is_at_repo_root,
161 is_at_repo_root,
162 )?;
162 )?;
163 let mut outcome = common.outcome.into_inner().unwrap();
163 let mut outcome = common.outcome.into_inner().unwrap();
164 let new_cacheable = common.new_cacheable_directories.into_inner().unwrap();
164 let new_cacheable = common.new_cacheable_directories.into_inner().unwrap();
165 let outdated = common.outdated_cached_directories.into_inner().unwrap();
165 let outdated = common.outdated_cached_directories.into_inner().unwrap();
166
166
167 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
167 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
168 || !outdated.is_empty()
168 || !outdated.is_empty()
169 || (!new_cacheable.is_empty()
169 || (!new_cacheable.is_empty()
170 && dmap.dirstate_version == DirstateVersion::V2);
170 && dmap.dirstate_version == DirstateVersion::V2);
171
171
172 // Remove outdated mtimes before adding new mtimes, in case a given
172 // Remove outdated mtimes before adding new mtimes, in case a given
173 // directory is both
173 // directory is both
174 for path in &outdated {
174 for path in &outdated {
175 dmap.clear_cached_mtime(path)?;
175 dmap.clear_cached_mtime(path)?;
176 }
176 }
177 for (path, mtime) in &new_cacheable {
177 for (path, mtime) in &new_cacheable {
178 dmap.set_cached_mtime(path, *mtime)?;
178 dmap.set_cached_mtime(path, *mtime)?;
179 }
179 }
180
180
181 Ok((outcome, warnings))
181 Ok((outcome, warnings))
182 }
182 }
183
183
184 /// Bag of random things needed by various parts of the algorithm. Reduces the
184 /// Bag of random things needed by various parts of the algorithm. Reduces the
185 /// number of parameters passed to functions.
185 /// number of parameters passed to functions.
186 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
186 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
187 dmap: &'tree DirstateMap<'on_disk>,
187 dmap: &'tree DirstateMap<'on_disk>,
188 options: StatusOptions,
188 options: StatusOptions,
189 matcher: &'a (dyn Matcher + Sync),
189 matcher: &'a (dyn Matcher + Sync),
190 ignore_fn: IgnoreFnType<'a>,
190 ignore_fn: IgnoreFnType<'a>,
191 outcome: Mutex<DirstateStatus<'on_disk>>,
191 outcome: Mutex<DirstateStatus<'on_disk>>,
192 /// New timestamps of directories to be used for caching their readdirs
192 /// New timestamps of directories to be used for caching their readdirs
193 new_cacheable_directories:
193 new_cacheable_directories:
194 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
194 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
195 /// Used to invalidate the readdir cache of directories
195 /// Used to invalidate the readdir cache of directories
196 outdated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
196 outdated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
197
197
198 /// Whether ignore files like `.hgignore` have changed since the previous
198 /// Whether ignore files like `.hgignore` have changed since the previous
199 /// time a `status()` call wrote their hash to the dirstate. `None` means
199 /// time a `status()` call wrote their hash to the dirstate. `None` means
200 /// we don’t know as this run doesn’t list either ignored or uknown files
200 /// we don’t know as this run doesn’t list either ignored or uknown files
201 /// and therefore isn’t reading `.hgignore`.
201 /// and therefore isn’t reading `.hgignore`.
202 ignore_patterns_have_changed: Option<bool>,
202 ignore_patterns_have_changed: Option<bool>,
203
203
204 /// The current time at the start of the `status()` algorithm, as measured
204 /// The current time at the start of the `status()` algorithm, as measured
205 /// and possibly truncated by the filesystem.
205 /// and possibly truncated by the filesystem.
206 filesystem_time_at_status_start: Option<TruncatedTimestamp>,
206 filesystem_time_at_status_start: Option<TruncatedTimestamp>,
207 }
207 }
208
208
209 enum Outcome {
209 enum Outcome {
210 Modified,
210 Modified,
211 Added,
211 Added,
212 Removed,
212 Removed,
213 Deleted,
213 Deleted,
214 Clean,
214 Clean,
215 Ignored,
215 Ignored,
216 Unknown,
216 Unknown,
217 Unsure,
217 Unsure,
218 }
218 }
219
219
220 /// Lazy computation of whether a given path has a hgignored
220 /// Lazy computation of whether a given path has a hgignored
221 /// ancestor.
221 /// ancestor.
222 struct HasIgnoredAncestor<'a> {
222 struct HasIgnoredAncestor<'a> {
223 /// `path` and `parent` constitute the inputs to the computation,
223 /// `path` and `parent` constitute the inputs to the computation,
224 /// `cache` stores the outcome.
224 /// `cache` stores the outcome.
225 path: &'a HgPath,
225 path: &'a HgPath,
226 parent: Option<&'a HasIgnoredAncestor<'a>>,
226 parent: Option<&'a HasIgnoredAncestor<'a>>,
227 cache: OnceCell<bool>,
227 cache: OnceCell<bool>,
228 }
228 }
229
229
230 impl<'a> HasIgnoredAncestor<'a> {
230 impl<'a> HasIgnoredAncestor<'a> {
231 fn create(
231 fn create(
232 parent: Option<&'a HasIgnoredAncestor<'a>>,
232 parent: Option<&'a HasIgnoredAncestor<'a>>,
233 path: &'a HgPath,
233 path: &'a HgPath,
234 ) -> HasIgnoredAncestor<'a> {
234 ) -> HasIgnoredAncestor<'a> {
235 Self {
235 Self {
236 path,
236 path,
237 parent,
237 parent,
238 cache: OnceCell::new(),
238 cache: OnceCell::new(),
239 }
239 }
240 }
240 }
241
241
242 fn force<'b>(&self, ignore_fn: &IgnoreFnType<'b>) -> bool {
242 fn force<'b>(&self, ignore_fn: &IgnoreFnType<'b>) -> bool {
243 match self.parent {
243 match self.parent {
244 None => false,
244 None => false,
245 Some(parent) => {
245 Some(parent) => {
246 *(parent.cache.get_or_init(|| {
246 *(parent.cache.get_or_init(|| {
247 parent.force(ignore_fn) || ignore_fn(&self.path)
247 parent.force(ignore_fn) || ignore_fn(&self.path)
248 }))
248 }))
249 }
249 }
250 }
250 }
251 }
251 }
252 }
252 }
253
253
254 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
254 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
255 fn push_outcome(
255 fn push_outcome(
256 &self,
256 &self,
257 which: Outcome,
257 which: Outcome,
258 dirstate_node: &NodeRef<'tree, 'on_disk>,
258 dirstate_node: &NodeRef<'tree, 'on_disk>,
259 ) -> Result<(), DirstateV2ParseError> {
259 ) -> Result<(), DirstateV2ParseError> {
260 let path = dirstate_node
260 let path = dirstate_node
261 .full_path_borrowed(self.dmap.on_disk)?
261 .full_path_borrowed(self.dmap.on_disk)?
262 .detach_from_tree();
262 .detach_from_tree();
263 let copy_source = if self.options.list_copies {
263 let copy_source = if self.options.list_copies {
264 dirstate_node
264 dirstate_node
265 .copy_source_borrowed(self.dmap.on_disk)?
265 .copy_source_borrowed(self.dmap.on_disk)?
266 .map(|source| source.detach_from_tree())
266 .map(|source| source.detach_from_tree())
267 } else {
267 } else {
268 None
268 None
269 };
269 };
270 self.push_outcome_common(which, path, copy_source);
270 self.push_outcome_common(which, path, copy_source);
271 Ok(())
271 Ok(())
272 }
272 }
273
273
274 fn push_outcome_without_copy_source(
274 fn push_outcome_without_copy_source(
275 &self,
275 &self,
276 which: Outcome,
276 which: Outcome,
277 path: &BorrowedPath<'_, 'on_disk>,
277 path: &BorrowedPath<'_, 'on_disk>,
278 ) {
278 ) {
279 self.push_outcome_common(which, path.detach_from_tree(), None)
279 self.push_outcome_common(which, path.detach_from_tree(), None)
280 }
280 }
281
281
282 fn push_outcome_common(
282 fn push_outcome_common(
283 &self,
283 &self,
284 which: Outcome,
284 which: Outcome,
285 path: HgPathCow<'on_disk>,
285 path: HgPathCow<'on_disk>,
286 copy_source: Option<HgPathCow<'on_disk>>,
286 copy_source: Option<HgPathCow<'on_disk>>,
287 ) {
287 ) {
288 let mut outcome = self.outcome.lock().unwrap();
288 let mut outcome = self.outcome.lock().unwrap();
289 let vec = match which {
289 let vec = match which {
290 Outcome::Modified => &mut outcome.modified,
290 Outcome::Modified => &mut outcome.modified,
291 Outcome::Added => &mut outcome.added,
291 Outcome::Added => &mut outcome.added,
292 Outcome::Removed => &mut outcome.removed,
292 Outcome::Removed => &mut outcome.removed,
293 Outcome::Deleted => &mut outcome.deleted,
293 Outcome::Deleted => &mut outcome.deleted,
294 Outcome::Clean => &mut outcome.clean,
294 Outcome::Clean => &mut outcome.clean,
295 Outcome::Ignored => &mut outcome.ignored,
295 Outcome::Ignored => &mut outcome.ignored,
296 Outcome::Unknown => &mut outcome.unknown,
296 Outcome::Unknown => &mut outcome.unknown,
297 Outcome::Unsure => &mut outcome.unsure,
297 Outcome::Unsure => &mut outcome.unsure,
298 };
298 };
299 vec.push(StatusPath { path, copy_source });
299 vec.push(StatusPath { path, copy_source });
300 }
300 }
301
301
302 fn read_dir(
302 fn read_dir(
303 &self,
303 &self,
304 hg_path: &HgPath,
304 hg_path: &HgPath,
305 fs_path: &Path,
305 fs_path: &Path,
306 is_at_repo_root: bool,
306 is_at_repo_root: bool,
307 ) -> Result<Vec<DirEntry>, ()> {
307 ) -> Result<Vec<DirEntry>, ()> {
308 DirEntry::read_dir(fs_path, is_at_repo_root)
308 DirEntry::read_dir(fs_path, is_at_repo_root)
309 .map_err(|error| self.io_error(error, hg_path))
309 .map_err(|error| self.io_error(error, hg_path))
310 }
310 }
311
311
312 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
312 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
313 let errno = error.raw_os_error().expect("expected real OS error");
313 let errno = error.raw_os_error().expect("expected real OS error");
314 self.outcome
314 self.outcome
315 .lock()
315 .lock()
316 .unwrap()
316 .unwrap()
317 .bad
317 .bad
318 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
318 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
319 }
319 }
320
320
321 fn check_for_outdated_directory_cache(
321 fn check_for_outdated_directory_cache(
322 &self,
322 &self,
323 dirstate_node: &NodeRef<'tree, 'on_disk>,
323 dirstate_node: &NodeRef<'tree, 'on_disk>,
324 ) -> Result<bool, DirstateV2ParseError> {
324 ) -> Result<bool, DirstateV2ParseError> {
325 if self.ignore_patterns_have_changed == Some(true)
325 if self.ignore_patterns_have_changed == Some(true)
326 && dirstate_node.cached_directory_mtime()?.is_some()
326 && dirstate_node.cached_directory_mtime()?.is_some()
327 {
327 {
328 self.outdated_cached_directories.lock().unwrap().push(
328 self.outdated_cached_directories.lock().unwrap().push(
329 dirstate_node
329 dirstate_node
330 .full_path_borrowed(self.dmap.on_disk)?
330 .full_path_borrowed(self.dmap.on_disk)?
331 .detach_from_tree(),
331 .detach_from_tree(),
332 );
332 );
333 return Ok(true);
333 return Ok(true);
334 }
334 }
335 Ok(false)
335 Ok(false)
336 }
336 }
337
337
338 /// If this returns true, we can get accurate results by only using
338 /// If this returns true, we can get accurate results by only using
339 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
339 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
340 /// need to call `read_dir`.
340 /// need to call `read_dir`.
341 fn can_skip_fs_readdir(
341 fn can_skip_fs_readdir(
342 &self,
342 &self,
343 directory_metadata: Option<&std::fs::Metadata>,
343 directory_metadata: Option<&std::fs::Metadata>,
344 cached_directory_mtime: Option<TruncatedTimestamp>,
344 cached_directory_mtime: Option<TruncatedTimestamp>,
345 ) -> bool {
345 ) -> bool {
346 if !self.options.list_unknown && !self.options.list_ignored {
346 if !self.options.list_unknown && !self.options.list_ignored {
347 // All states that we care about listing have corresponding
347 // All states that we care about listing have corresponding
348 // dirstate entries.
348 // dirstate entries.
349 // This happens for example with `hg status -mard`.
349 // This happens for example with `hg status -mard`.
350 return true;
350 return true;
351 }
351 }
352 if !self.options.list_ignored
352 if !self.options.list_ignored
353 && self.ignore_patterns_have_changed == Some(false)
353 && self.ignore_patterns_have_changed == Some(false)
354 {
354 {
355 if let Some(cached_mtime) = cached_directory_mtime {
355 if let Some(cached_mtime) = cached_directory_mtime {
356 // The dirstate contains a cached mtime for this directory, set
356 // The dirstate contains a cached mtime for this directory, set
357 // by a previous run of the `status` algorithm which found this
357 // by a previous run of the `status` algorithm which found this
358 // directory eligible for `read_dir` caching.
358 // directory eligible for `read_dir` caching.
359 if let Some(meta) = directory_metadata {
359 if let Some(meta) = directory_metadata {
360 if cached_mtime
360 if cached_mtime
361 .likely_equal_to_mtime_of(meta)
361 .likely_equal_to_mtime_of(meta)
362 .unwrap_or(false)
362 .unwrap_or(false)
363 {
363 {
364 // The mtime of that directory has not changed
364 // The mtime of that directory has not changed
365 // since then, which means that the results of
365 // since then, which means that the results of
366 // `read_dir` should also be unchanged.
366 // `read_dir` should also be unchanged.
367 return true;
367 return true;
368 }
368 }
369 }
369 }
370 }
370 }
371 }
371 }
372 false
372 false
373 }
373 }
374
374
375 /// Returns whether all child entries of the filesystem directory have a
375 /// Returns whether all child entries of the filesystem directory have a
376 /// corresponding dirstate node or are ignored.
376 /// corresponding dirstate node or are ignored.
377 fn traverse_fs_directory_and_dirstate<'ancestor>(
377 fn traverse_fs_directory_and_dirstate<'ancestor>(
378 &self,
378 &self,
379 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
379 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
380 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
380 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
381 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
381 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
382 directory_fs_path: &Path,
382 directory_fs_path: &Path,
383 directory_metadata: Option<&std::fs::Metadata>,
383 directory_metadata: Option<&std::fs::Metadata>,
384 cached_directory_mtime: Option<TruncatedTimestamp>,
384 cached_directory_mtime: Option<TruncatedTimestamp>,
385 is_at_repo_root: bool,
385 is_at_repo_root: bool,
386 ) -> Result<bool, DirstateV2ParseError> {
386 ) -> Result<bool, DirstateV2ParseError> {
387 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
387 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
388 {
388 {
389 dirstate_nodes
389 dirstate_nodes
390 .par_iter()
390 .par_iter()
391 .map(|dirstate_node| {
391 .map(|dirstate_node| {
392 let fs_path = directory_fs_path.join(get_path_from_bytes(
392 let fs_path = directory_fs_path.join(get_path_from_bytes(
393 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
393 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
394 ));
394 ));
395 match std::fs::symlink_metadata(&fs_path) {
395 match std::fs::symlink_metadata(&fs_path) {
396 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
396 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
397 &fs_path,
397 &fs_path,
398 &fs_metadata,
398 &fs_metadata,
399 dirstate_node,
399 dirstate_node,
400 has_ignored_ancestor,
400 has_ignored_ancestor,
401 ),
401 ),
402 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
402 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
403 self.traverse_dirstate_only(dirstate_node)
403 self.traverse_dirstate_only(dirstate_node)
404 }
404 }
405 Err(error) => {
405 Err(error) => {
406 let hg_path =
406 let hg_path =
407 dirstate_node.full_path(self.dmap.on_disk)?;
407 dirstate_node.full_path(self.dmap.on_disk)?;
408 Ok(self.io_error(error, hg_path))
408 Ok(self.io_error(error, hg_path))
409 }
409 }
410 }
410 }
411 })
411 })
412 .collect::<Result<_, _>>()?;
412 .collect::<Result<_, _>>()?;
413
413
414 // We don’t know, so conservatively say this isn’t the case
414 // We don’t know, so conservatively say this isn’t the case
415 let children_all_have_dirstate_node_or_are_ignored = false;
415 let children_all_have_dirstate_node_or_are_ignored = false;
416
416
417 return Ok(children_all_have_dirstate_node_or_are_ignored);
417 return Ok(children_all_have_dirstate_node_or_are_ignored);
418 }
418 }
419
419
420 let mut fs_entries = if let Ok(entries) = self.read_dir(
420 let mut fs_entries = if let Ok(entries) = self.read_dir(
421 directory_hg_path,
421 directory_hg_path,
422 directory_fs_path,
422 directory_fs_path,
423 is_at_repo_root,
423 is_at_repo_root,
424 ) {
424 ) {
425 entries
425 entries
426 } else {
426 } else {
427 // Treat an unreadable directory (typically because of insufficient
427 // Treat an unreadable directory (typically because of insufficient
428 // permissions) like an empty directory. `self.read_dir` has
428 // permissions) like an empty directory. `self.read_dir` has
429 // already called `self.io_error` so a warning will be emitted.
429 // already called `self.io_error` so a warning will be emitted.
430 Vec::new()
430 Vec::new()
431 };
431 };
432
432
433 // `merge_join_by` requires both its input iterators to be sorted:
433 // `merge_join_by` requires both its input iterators to be sorted:
434
434
435 let dirstate_nodes = dirstate_nodes.sorted();
435 let dirstate_nodes = dirstate_nodes.sorted();
436 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
436 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
437 // https://github.com/rust-lang/rust/issues/34162
437 // https://github.com/rust-lang/rust/issues/34162
438 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
438 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
439
439
440 // Propagate here any error that would happen inside the comparison
440 // Propagate here any error that would happen inside the comparison
441 // callback below
441 // callback below
442 for dirstate_node in &dirstate_nodes {
442 for dirstate_node in &dirstate_nodes {
443 dirstate_node.base_name(self.dmap.on_disk)?;
443 dirstate_node.base_name(self.dmap.on_disk)?;
444 }
444 }
445 itertools::merge_join_by(
445 itertools::merge_join_by(
446 dirstate_nodes,
446 dirstate_nodes,
447 &fs_entries,
447 &fs_entries,
448 |dirstate_node, fs_entry| {
448 |dirstate_node, fs_entry| {
449 // This `unwrap` never panics because we already propagated
449 // This `unwrap` never panics because we already propagated
450 // those errors above
450 // those errors above
451 dirstate_node
451 dirstate_node
452 .base_name(self.dmap.on_disk)
452 .base_name(self.dmap.on_disk)
453 .unwrap()
453 .unwrap()
454 .cmp(&fs_entry.base_name)
454 .cmp(&fs_entry.base_name)
455 },
455 },
456 )
456 )
457 .par_bridge()
457 .par_bridge()
458 .map(|pair| {
458 .map(|pair| {
459 use itertools::EitherOrBoth::*;
459 use itertools::EitherOrBoth::*;
460 let has_dirstate_node_or_is_ignored;
460 let has_dirstate_node_or_is_ignored;
461 match pair {
461 match pair {
462 Both(dirstate_node, fs_entry) => {
462 Both(dirstate_node, fs_entry) => {
463 self.traverse_fs_and_dirstate(
463 self.traverse_fs_and_dirstate(
464 &fs_entry.full_path,
464 &fs_entry.full_path,
465 &fs_entry.metadata,
465 &fs_entry.metadata,
466 dirstate_node,
466 dirstate_node,
467 has_ignored_ancestor,
467 has_ignored_ancestor,
468 )?;
468 )?;
469 has_dirstate_node_or_is_ignored = true
469 has_dirstate_node_or_is_ignored = true
470 }
470 }
471 Left(dirstate_node) => {
471 Left(dirstate_node) => {
472 self.traverse_dirstate_only(dirstate_node)?;
472 self.traverse_dirstate_only(dirstate_node)?;
473 has_dirstate_node_or_is_ignored = true;
473 has_dirstate_node_or_is_ignored = true;
474 }
474 }
475 Right(fs_entry) => {
475 Right(fs_entry) => {
476 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
476 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
477 has_ignored_ancestor.force(&self.ignore_fn),
477 has_ignored_ancestor.force(&self.ignore_fn),
478 directory_hg_path,
478 directory_hg_path,
479 fs_entry,
479 fs_entry,
480 )
480 )
481 }
481 }
482 }
482 }
483 Ok(has_dirstate_node_or_is_ignored)
483 Ok(has_dirstate_node_or_is_ignored)
484 })
484 })
485 .try_reduce(|| true, |a, b| Ok(a && b))
485 .try_reduce(|| true, |a, b| Ok(a && b))
486 }
486 }
487
487
488 fn traverse_fs_and_dirstate<'ancestor>(
488 fn traverse_fs_and_dirstate<'ancestor>(
489 &self,
489 &self,
490 fs_path: &Path,
490 fs_path: &Path,
491 fs_metadata: &std::fs::Metadata,
491 fs_metadata: &std::fs::Metadata,
492 dirstate_node: NodeRef<'tree, 'on_disk>,
492 dirstate_node: NodeRef<'tree, 'on_disk>,
493 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
493 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
494 ) -> Result<(), DirstateV2ParseError> {
494 ) -> Result<(), DirstateV2ParseError> {
495 let outdated_dircache =
495 let outdated_dircache =
496 self.check_for_outdated_directory_cache(&dirstate_node)?;
496 self.check_for_outdated_directory_cache(&dirstate_node)?;
497 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
497 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
498 let file_type = fs_metadata.file_type();
498 let file_type = fs_metadata.file_type();
499 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
499 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
500 if !file_or_symlink {
500 if !file_or_symlink {
501 // If we previously had a file here, it was removed (with
501 // If we previously had a file here, it was removed (with
502 // `hg rm` or similar) or deleted before it could be
502 // `hg rm` or similar) or deleted before it could be
503 // replaced by a directory or something else.
503 // replaced by a directory or something else.
504 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
504 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
505 }
505 }
506 if file_type.is_dir() {
506 if file_type.is_dir() {
507 if self.options.collect_traversed_dirs {
507 if self.options.collect_traversed_dirs {
508 self.outcome
508 self.outcome
509 .lock()
509 .lock()
510 .unwrap()
510 .unwrap()
511 .traversed
511 .traversed
512 .push(hg_path.detach_from_tree())
512 .push(hg_path.detach_from_tree())
513 }
513 }
514 let is_ignored = HasIgnoredAncestor::create(
514 let is_ignored = HasIgnoredAncestor::create(
515 Some(&has_ignored_ancestor),
515 Some(&has_ignored_ancestor),
516 hg_path,
516 hg_path,
517 );
517 );
518 let is_at_repo_root = false;
518 let is_at_repo_root = false;
519 let children_all_have_dirstate_node_or_are_ignored = self
519 let children_all_have_dirstate_node_or_are_ignored = self
520 .traverse_fs_directory_and_dirstate(
520 .traverse_fs_directory_and_dirstate(
521 &is_ignored,
521 &is_ignored,
522 dirstate_node.children(self.dmap.on_disk)?,
522 dirstate_node.children(self.dmap.on_disk)?,
523 hg_path,
523 hg_path,
524 fs_path,
524 fs_path,
525 Some(fs_metadata),
525 Some(fs_metadata),
526 dirstate_node.cached_directory_mtime()?,
526 dirstate_node.cached_directory_mtime()?,
527 is_at_repo_root,
527 is_at_repo_root,
528 )?;
528 )?;
529 self.maybe_save_directory_mtime(
529 self.maybe_save_directory_mtime(
530 children_all_have_dirstate_node_or_are_ignored,
530 children_all_have_dirstate_node_or_are_ignored,
531 fs_metadata,
531 fs_metadata,
532 dirstate_node,
532 dirstate_node,
533 outdated_dircache,
533 outdated_dircache,
534 )?
534 )?
535 } else {
535 } else {
536 if file_or_symlink && self.matcher.matches(&hg_path) {
536 if file_or_symlink && self.matcher.matches(&hg_path) {
537 if let Some(entry) = dirstate_node.entry()? {
537 if let Some(entry) = dirstate_node.entry()? {
538 if !entry.any_tracked() {
538 if !entry.any_tracked() {
539 // Forward-compat if we start tracking unknown/ignored
539 // Forward-compat if we start tracking unknown/ignored
540 // files for caching reasons
540 // files for caching reasons
541 self.mark_unknown_or_ignored(
541 self.mark_unknown_or_ignored(
542 has_ignored_ancestor.force(&self.ignore_fn),
542 has_ignored_ancestor.force(&self.ignore_fn),
543 &hg_path,
543 &hg_path,
544 );
544 );
545 }
545 }
546 if entry.added() {
546 if entry.added() {
547 self.push_outcome(Outcome::Added, &dirstate_node)?;
547 self.push_outcome(Outcome::Added, &dirstate_node)?;
548 } else if entry.removed() {
548 } else if entry.removed() {
549 self.push_outcome(Outcome::Removed, &dirstate_node)?;
549 self.push_outcome(Outcome::Removed, &dirstate_node)?;
550 } else if entry.modified() {
550 } else if entry.modified() {
551 self.push_outcome(Outcome::Modified, &dirstate_node)?;
551 self.push_outcome(Outcome::Modified, &dirstate_node)?;
552 } else {
552 } else {
553 self.handle_normal_file(&dirstate_node, fs_metadata)?;
553 self.handle_normal_file(&dirstate_node, fs_metadata)?;
554 }
554 }
555 } else {
555 } else {
556 // `node.entry.is_none()` indicates a "directory"
556 // `node.entry.is_none()` indicates a "directory"
557 // node, but the filesystem has a file
557 // node, but the filesystem has a file
558 self.mark_unknown_or_ignored(
558 self.mark_unknown_or_ignored(
559 has_ignored_ancestor.force(&self.ignore_fn),
559 has_ignored_ancestor.force(&self.ignore_fn),
560 hg_path,
560 hg_path,
561 );
561 );
562 }
562 }
563 }
563 }
564
564
565 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
565 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
566 {
566 {
567 self.traverse_dirstate_only(child_node)?
567 self.traverse_dirstate_only(child_node)?
568 }
568 }
569 }
569 }
570 Ok(())
570 Ok(())
571 }
571 }
572
572
573 /// Save directory mtime if applicable.
573 /// Save directory mtime if applicable.
574 ///
574 ///
575 /// `outdated_directory_cache` is `true` if we've just invalidated the
575 /// `outdated_directory_cache` is `true` if we've just invalidated the
576 /// cache for this directory in `check_for_outdated_directory_cache`,
576 /// cache for this directory in `check_for_outdated_directory_cache`,
577 /// which forces the update.
577 /// which forces the update.
578 fn maybe_save_directory_mtime(
578 fn maybe_save_directory_mtime(
579 &self,
579 &self,
580 children_all_have_dirstate_node_or_are_ignored: bool,
580 children_all_have_dirstate_node_or_are_ignored: bool,
581 directory_metadata: &std::fs::Metadata,
581 directory_metadata: &std::fs::Metadata,
582 dirstate_node: NodeRef<'tree, 'on_disk>,
582 dirstate_node: NodeRef<'tree, 'on_disk>,
583 outdated_directory_cache: bool,
583 outdated_directory_cache: bool,
584 ) -> Result<(), DirstateV2ParseError> {
584 ) -> Result<(), DirstateV2ParseError> {
585 if !children_all_have_dirstate_node_or_are_ignored {
585 if !children_all_have_dirstate_node_or_are_ignored {
586 return Ok(());
586 return Ok(());
587 }
587 }
588 // All filesystem directory entries from `read_dir` have a
588 // All filesystem directory entries from `read_dir` have a
589 // corresponding node in the dirstate, so we can reconstitute the
589 // corresponding node in the dirstate, so we can reconstitute the
590 // names of those entries without calling `read_dir` again.
590 // names of those entries without calling `read_dir` again.
591
591
592 // TODO: use let-else here and below when available:
592 // TODO: use let-else here and below when available:
593 // https://github.com/rust-lang/rust/issues/87335
593 // https://github.com/rust-lang/rust/issues/87335
594 let status_start = if let Some(status_start) =
594 let status_start = if let Some(status_start) =
595 &self.filesystem_time_at_status_start
595 &self.filesystem_time_at_status_start
596 {
596 {
597 status_start
597 status_start
598 } else {
598 } else {
599 return Ok(());
599 return Ok(());
600 };
600 };
601
601
602 // Although the Rust standard library’s `SystemTime` type
602 // Although the Rust standard library’s `SystemTime` type
603 // has nanosecond precision, the times reported for a
603 // has nanosecond precision, the times reported for a
604 // directory’s (or file’s) modified time may have lower
604 // directory’s (or file’s) modified time may have lower
605 // resolution based on the filesystem (for example ext3
605 // resolution based on the filesystem (for example ext3
606 // only stores integer seconds), kernel (see
606 // only stores integer seconds), kernel (see
607 // https://stackoverflow.com/a/14393315/1162888), etc.
607 // https://stackoverflow.com/a/14393315/1162888), etc.
608 let directory_mtime = if let Ok(option) =
608 let directory_mtime = if let Ok(option) =
609 TruncatedTimestamp::for_reliable_mtime_of(
609 TruncatedTimestamp::for_reliable_mtime_of(
610 directory_metadata,
610 directory_metadata,
611 status_start,
611 status_start,
612 ) {
612 ) {
613 if let Some(directory_mtime) = option {
613 if let Some(directory_mtime) = option {
614 directory_mtime
614 directory_mtime
615 } else {
615 } else {
616 // The directory was modified too recently,
616 // The directory was modified too recently,
617 // don’t cache its `read_dir` results.
617 // don’t cache its `read_dir` results.
618 //
618 //
619 // 1. A change to this directory (direct child was
619 // 1. A change to this directory (direct child was
620 // added or removed) cause its mtime to be set
620 // added or removed) cause its mtime to be set
621 // (possibly truncated) to `directory_mtime`
621 // (possibly truncated) to `directory_mtime`
622 // 2. This `status` algorithm calls `read_dir`
622 // 2. This `status` algorithm calls `read_dir`
623 // 3. An other change is made to the same directory is
623 // 3. An other change is made to the same directory is
624 // made so that calling `read_dir` agin would give
624 // made so that calling `read_dir` agin would give
625 // different results, but soon enough after 1. that
625 // different results, but soon enough after 1. that
626 // the mtime stays the same
626 // the mtime stays the same
627 //
627 //
628 // On a system where the time resolution poor, this
628 // On a system where the time resolution poor, this
629 // scenario is not unlikely if all three steps are caused
629 // scenario is not unlikely if all three steps are caused
630 // by the same script.
630 // by the same script.
631 return Ok(());
631 return Ok(());
632 }
632 }
633 } else {
633 } else {
634 // OS/libc does not support mtime?
634 // OS/libc does not support mtime?
635 return Ok(());
635 return Ok(());
636 };
636 };
637 // We’ve observed (through `status_start`) that time has
637 // We’ve observed (through `status_start`) that time has
638 // “progressed” since `directory_mtime`, so any further
638 // “progressed” since `directory_mtime`, so any further
639 // change to this directory is extremely likely to cause a
639 // change to this directory is extremely likely to cause a
640 // different mtime.
640 // different mtime.
641 //
641 //
642 // Having the same mtime again is not entirely impossible
642 // Having the same mtime again is not entirely impossible
643 // since the system clock is not monotonous. It could jump
643 // since the system clock is not monotonous. It could jump
644 // backward to some point before `directory_mtime`, then a
644 // backward to some point before `directory_mtime`, then a
645 // directory change could potentially happen during exactly
645 // directory change could potentially happen during exactly
646 // the wrong tick.
646 // the wrong tick.
647 //
647 //
648 // We deem this scenario (unlike the previous one) to be
648 // We deem this scenario (unlike the previous one) to be
649 // unlikely enough in practice.
649 // unlikely enough in practice.
650
650
651 let is_up_to_date = if let Some(cached) =
651 let is_up_to_date = if let Some(cached) =
652 dirstate_node.cached_directory_mtime()?
652 dirstate_node.cached_directory_mtime()?
653 {
653 {
654 !outdated_directory_cache && cached.likely_equal(directory_mtime)
654 !outdated_directory_cache && cached.likely_equal(directory_mtime)
655 } else {
655 } else {
656 false
656 false
657 };
657 };
658 if !is_up_to_date {
658 if !is_up_to_date {
659 let hg_path = dirstate_node
659 let hg_path = dirstate_node
660 .full_path_borrowed(self.dmap.on_disk)?
660 .full_path_borrowed(self.dmap.on_disk)?
661 .detach_from_tree();
661 .detach_from_tree();
662 self.new_cacheable_directories
662 self.new_cacheable_directories
663 .lock()
663 .lock()
664 .unwrap()
664 .unwrap()
665 .push((hg_path, directory_mtime))
665 .push((hg_path, directory_mtime))
666 }
666 }
667 Ok(())
667 Ok(())
668 }
668 }
669
669
670 /// A file that is clean in the dirstate was found in the filesystem
670 /// A file that is clean in the dirstate was found in the filesystem
671 fn handle_normal_file(
671 fn handle_normal_file(
672 &self,
672 &self,
673 dirstate_node: &NodeRef<'tree, 'on_disk>,
673 dirstate_node: &NodeRef<'tree, 'on_disk>,
674 fs_metadata: &std::fs::Metadata,
674 fs_metadata: &std::fs::Metadata,
675 ) -> Result<(), DirstateV2ParseError> {
675 ) -> Result<(), DirstateV2ParseError> {
676 // Keep the low 31 bits
676 // Keep the low 31 bits
677 fn truncate_u64(value: u64) -> i32 {
677 fn truncate_u64(value: u64) -> i32 {
678 (value & 0x7FFF_FFFF) as i32
678 (value & 0x7FFF_FFFF) as i32
679 }
679 }
680
680
681 let entry = dirstate_node
681 let entry = dirstate_node
682 .entry()?
682 .entry()?
683 .expect("handle_normal_file called with entry-less node");
683 .expect("handle_normal_file called with entry-less node");
684 let mode_changed =
684 let mode_changed =
685 || self.options.check_exec && entry.mode_changed(fs_metadata);
685 || self.options.check_exec && entry.mode_changed(fs_metadata);
686 let size = entry.size();
686 let size = entry.size();
687 let size_changed = size != truncate_u64(fs_metadata.len());
687 let size_changed = size != truncate_u64(fs_metadata.len());
688 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
688 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
689 // issue6456: Size returned may be longer due to encryption
689 // issue6456: Size returned may be longer due to encryption
690 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
690 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
691 self.push_outcome(Outcome::Unsure, dirstate_node)?
691 self.push_outcome(Outcome::Unsure, dirstate_node)?
692 } else if dirstate_node.has_copy_source()
692 } else if dirstate_node.has_copy_source()
693 || entry.is_from_other_parent()
693 || entry.is_from_other_parent()
694 || (size >= 0 && (size_changed || mode_changed()))
694 || (size >= 0 && (size_changed || mode_changed()))
695 {
695 {
696 self.push_outcome(Outcome::Modified, dirstate_node)?
696 self.push_outcome(Outcome::Modified, dirstate_node)?
697 } else {
697 } else {
698 let mtime_looks_clean;
698 let mtime_looks_clean;
699 if let Some(dirstate_mtime) = entry.truncated_mtime() {
699 if let Some(dirstate_mtime) = entry.truncated_mtime() {
700 let fs_mtime = TruncatedTimestamp::for_mtime_of(fs_metadata)
700 let fs_mtime = TruncatedTimestamp::for_mtime_of(fs_metadata)
701 .expect("OS/libc does not support mtime?");
701 .expect("OS/libc does not support mtime?");
702 // There might be a change in the future if for example the
702 // There might be a change in the future if for example the
703 // internal clock become off while process run, but this is a
703 // internal clock become off while process run, but this is a
704 // case where the issues the user would face
704 // case where the issues the user would face
705 // would be a lot worse and there is nothing we
705 // would be a lot worse and there is nothing we
706 // can really do.
706 // can really do.
707 mtime_looks_clean = fs_mtime.likely_equal(dirstate_mtime)
707 mtime_looks_clean = fs_mtime.likely_equal(dirstate_mtime)
708 } else {
708 } else {
709 // No mtime in the dirstate entry
709 // No mtime in the dirstate entry
710 mtime_looks_clean = false
710 mtime_looks_clean = false
711 };
711 };
712 if !mtime_looks_clean {
712 if !mtime_looks_clean {
713 self.push_outcome(Outcome::Unsure, dirstate_node)?
713 self.push_outcome(Outcome::Unsure, dirstate_node)?
714 } else if self.options.list_clean {
714 } else if self.options.list_clean {
715 self.push_outcome(Outcome::Clean, dirstate_node)?
715 self.push_outcome(Outcome::Clean, dirstate_node)?
716 }
716 }
717 }
717 }
718 Ok(())
718 Ok(())
719 }
719 }
720
720
721 /// A node in the dirstate tree has no corresponding filesystem entry
721 /// A node in the dirstate tree has no corresponding filesystem entry
722 fn traverse_dirstate_only(
722 fn traverse_dirstate_only(
723 &self,
723 &self,
724 dirstate_node: NodeRef<'tree, 'on_disk>,
724 dirstate_node: NodeRef<'tree, 'on_disk>,
725 ) -> Result<(), DirstateV2ParseError> {
725 ) -> Result<(), DirstateV2ParseError> {
726 self.check_for_outdated_directory_cache(&dirstate_node)?;
726 self.check_for_outdated_directory_cache(&dirstate_node)?;
727 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
727 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
728 dirstate_node
728 dirstate_node
729 .children(self.dmap.on_disk)?
729 .children(self.dmap.on_disk)?
730 .par_iter()
730 .par_iter()
731 .map(|child_node| self.traverse_dirstate_only(child_node))
731 .map(|child_node| self.traverse_dirstate_only(child_node))
732 .collect()
732 .collect()
733 }
733 }
734
734
735 /// A node in the dirstate tree has no corresponding *file* on the
735 /// A node in the dirstate tree has no corresponding *file* on the
736 /// filesystem
736 /// filesystem
737 ///
737 ///
738 /// Does nothing on a "directory" node
738 /// Does nothing on a "directory" node
739 fn mark_removed_or_deleted_if_file(
739 fn mark_removed_or_deleted_if_file(
740 &self,
740 &self,
741 dirstate_node: &NodeRef<'tree, 'on_disk>,
741 dirstate_node: &NodeRef<'tree, 'on_disk>,
742 ) -> Result<(), DirstateV2ParseError> {
742 ) -> Result<(), DirstateV2ParseError> {
743 if let Some(entry) = dirstate_node.entry()? {
743 if let Some(entry) = dirstate_node.entry()? {
744 if !entry.any_tracked() {
744 if !entry.any_tracked() {
745 // Future-compat for when we start storing ignored and unknown
745 // Future-compat for when we start storing ignored and unknown
746 // files for caching reasons
746 // files for caching reasons
747 return Ok(());
747 return Ok(());
748 }
748 }
749 let path = dirstate_node.full_path(self.dmap.on_disk)?;
749 let path = dirstate_node.full_path(self.dmap.on_disk)?;
750 if self.matcher.matches(path) {
750 if self.matcher.matches(path) {
751 if entry.removed() {
751 if entry.removed() {
752 self.push_outcome(Outcome::Removed, dirstate_node)?
752 self.push_outcome(Outcome::Removed, dirstate_node)?
753 } else {
753 } else {
754 self.push_outcome(Outcome::Deleted, &dirstate_node)?
754 self.push_outcome(Outcome::Deleted, &dirstate_node)?
755 }
755 }
756 }
756 }
757 }
757 }
758 Ok(())
758 Ok(())
759 }
759 }
760
760
761 /// Something in the filesystem has no corresponding dirstate node
761 /// Something in the filesystem has no corresponding dirstate node
762 ///
762 ///
763 /// Returns whether that path is ignored
763 /// Returns whether that path is ignored
764 fn traverse_fs_only(
764 fn traverse_fs_only(
765 &self,
765 &self,
766 has_ignored_ancestor: bool,
766 has_ignored_ancestor: bool,
767 directory_hg_path: &HgPath,
767 directory_hg_path: &HgPath,
768 fs_entry: &DirEntry,
768 fs_entry: &DirEntry,
769 ) -> bool {
769 ) -> bool {
770 let hg_path = directory_hg_path.join(&fs_entry.base_name);
770 let hg_path = directory_hg_path.join(&fs_entry.base_name);
771 let file_type = fs_entry.metadata.file_type();
771 let file_type = fs_entry.metadata.file_type();
772 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
772 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
773 if file_type.is_dir() {
773 if file_type.is_dir() {
774 let is_ignored =
774 let is_ignored =
775 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
775 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
776 let traverse_children = if is_ignored {
776 let traverse_children = if is_ignored {
777 // Descendants of an ignored directory are all ignored
777 // Descendants of an ignored directory are all ignored
778 self.options.list_ignored
778 self.options.list_ignored
779 } else {
779 } else {
780 // Descendants of an unknown directory may be either unknown or
780 // Descendants of an unknown directory may be either unknown or
781 // ignored
781 // ignored
782 self.options.list_unknown || self.options.list_ignored
782 self.options.list_unknown || self.options.list_ignored
783 };
783 };
784 if traverse_children {
784 if traverse_children {
785 let is_at_repo_root = false;
785 let is_at_repo_root = false;
786 if let Ok(children_fs_entries) = self.read_dir(
786 if let Ok(children_fs_entries) = self.read_dir(
787 &hg_path,
787 &hg_path,
788 &fs_entry.full_path,
788 &fs_entry.full_path,
789 is_at_repo_root,
789 is_at_repo_root,
790 ) {
790 ) {
791 children_fs_entries.par_iter().for_each(|child_fs_entry| {
791 children_fs_entries.par_iter().for_each(|child_fs_entry| {
792 self.traverse_fs_only(
792 self.traverse_fs_only(
793 is_ignored,
793 is_ignored,
794 &hg_path,
794 &hg_path,
795 child_fs_entry,
795 child_fs_entry,
796 );
796 );
797 })
797 })
798 }
798 }
799 if self.options.collect_traversed_dirs {
799 if self.options.collect_traversed_dirs {
800 self.outcome.lock().unwrap().traversed.push(hg_path.into())
800 self.outcome.lock().unwrap().traversed.push(hg_path.into())
801 }
801 }
802 }
802 }
803 is_ignored
803 is_ignored
804 } else {
804 } else {
805 if file_or_symlink {
805 if file_or_symlink {
806 if self.matcher.matches(&hg_path) {
806 if self.matcher.matches(&hg_path) {
807 self.mark_unknown_or_ignored(
807 self.mark_unknown_or_ignored(
808 has_ignored_ancestor,
808 has_ignored_ancestor,
809 &BorrowedPath::InMemory(&hg_path),
809 &BorrowedPath::InMemory(&hg_path),
810 )
810 )
811 } else {
811 } else {
812 // We haven’t computed whether this path is ignored. It
812 // We haven’t computed whether this path is ignored. It
813 // might not be, and a future run of status might have a
813 // might not be, and a future run of status might have a
814 // different matcher that matches it. So treat it as not
814 // different matcher that matches it. So treat it as not
815 // ignored. That is, inhibit readdir caching of the parent
815 // ignored. That is, inhibit readdir caching of the parent
816 // directory.
816 // directory.
817 false
817 false
818 }
818 }
819 } else {
819 } else {
820 // This is neither a directory, a plain file, or a symlink.
820 // This is neither a directory, a plain file, or a symlink.
821 // Treat it like an ignored file.
821 // Treat it like an ignored file.
822 true
822 true
823 }
823 }
824 }
824 }
825 }
825 }
826
826
827 /// Returns whether that path is ignored
827 /// Returns whether that path is ignored
828 fn mark_unknown_or_ignored(
828 fn mark_unknown_or_ignored(
829 &self,
829 &self,
830 has_ignored_ancestor: bool,
830 has_ignored_ancestor: bool,
831 hg_path: &BorrowedPath<'_, 'on_disk>,
831 hg_path: &BorrowedPath<'_, 'on_disk>,
832 ) -> bool {
832 ) -> bool {
833 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
833 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
834 if is_ignored {
834 if is_ignored {
835 if self.options.list_ignored {
835 if self.options.list_ignored {
836 self.push_outcome_without_copy_source(
836 self.push_outcome_without_copy_source(
837 Outcome::Ignored,
837 Outcome::Ignored,
838 hg_path,
838 hg_path,
839 )
839 )
840 }
840 }
841 } else {
841 } else {
842 if self.options.list_unknown {
842 if self.options.list_unknown {
843 self.push_outcome_without_copy_source(
843 self.push_outcome_without_copy_source(
844 Outcome::Unknown,
844 Outcome::Unknown,
845 hg_path,
845 hg_path,
846 )
846 )
847 }
847 }
848 }
848 }
849 is_ignored
849 is_ignored
850 }
850 }
851 }
851 }
852
852
853 struct DirEntry {
853 struct DirEntry {
854 base_name: HgPathBuf,
854 base_name: HgPathBuf,
855 full_path: PathBuf,
855 full_path: PathBuf,
856 metadata: std::fs::Metadata,
856 metadata: std::fs::Metadata,
857 }
857 }
858
858
859 impl DirEntry {
859 impl DirEntry {
860 /// Returns **unsorted** entries in the given directory, with name and
860 /// Returns **unsorted** entries in the given directory, with name and
861 /// metadata.
861 /// metadata.
862 ///
862 ///
863 /// If a `.hg` sub-directory is encountered:
863 /// If a `.hg` sub-directory is encountered:
864 ///
864 ///
865 /// * At the repository root, ignore that sub-directory
865 /// * At the repository root, ignore that sub-directory
866 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
866 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
867 /// list instead.
867 /// list instead.
868 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
868 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
869 // `read_dir` returns a "not found" error for the empty path
869 // `read_dir` returns a "not found" error for the empty path
870 let at_cwd = path == Path::new("");
870 let at_cwd = path == Path::new("");
871 let read_dir_path = if at_cwd { Path::new(".") } else { path };
871 let read_dir_path = if at_cwd { Path::new(".") } else { path };
872 let mut results = Vec::new();
872 let mut results = Vec::new();
873 for entry in read_dir_path.read_dir()? {
873 for entry in read_dir_path.read_dir()? {
874 let entry = entry?;
874 let entry = entry?;
875 let metadata = match entry.metadata() {
875 let metadata = match entry.metadata() {
876 Ok(v) => v,
876 Ok(v) => v,
877 Err(e) => {
877 Err(e) => {
878 // race with file deletion?
878 // race with file deletion?
879 if e.kind() == std::io::ErrorKind::NotFound {
879 if e.kind() == std::io::ErrorKind::NotFound {
880 continue;
880 continue;
881 } else {
881 } else {
882 return Err(e);
882 return Err(e);
883 }
883 }
884 }
884 }
885 };
885 };
886 let file_name = entry.file_name();
886 let file_name = entry.file_name();
887 // FIXME don't do this when cached
887 // FIXME don't do this when cached
888 if file_name == ".hg" {
888 if file_name == ".hg" {
889 if is_at_repo_root {
889 if is_at_repo_root {
890 // Skip the repo’s own .hg (might be a symlink)
890 // Skip the repo’s own .hg (might be a symlink)
891 continue;
891 continue;
892 } else if metadata.is_dir() {
892 } else if metadata.is_dir() {
893 // A .hg sub-directory at another location means a subrepo,
893 // A .hg sub-directory at another location means a subrepo,
894 // skip it entirely.
894 // skip it entirely.
895 return Ok(Vec::new());
895 return Ok(Vec::new());
896 }
896 }
897 }
897 }
898 let full_path = if at_cwd {
898 let full_path = if at_cwd {
899 file_name.clone().into()
899 file_name.clone().into()
900 } else {
900 } else {
901 entry.path()
901 entry.path()
902 };
902 };
903 let base_name = get_bytes_from_os_string(file_name).into();
903 let base_name = get_bytes_from_os_string(file_name).into();
904 results.push(DirEntry {
904 results.push(DirEntry {
905 base_name,
905 base_name,
906 full_path,
906 full_path,
907 metadata,
907 metadata,
908 })
908 })
909 }
909 }
910 Ok(results)
910 Ok(results)
911 }
911 }
912 }
912 }
913
913
914 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
914 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
915 /// of the give repository.
915 /// of the give repository.
916 ///
916 ///
917 /// This is similar to `SystemTime::now()`, with the result truncated to the
917 /// This is similar to `SystemTime::now()`, with the result truncated to the
918 /// same time resolution as other files’ modification times. Using `.hg`
918 /// same time resolution as other files’ modification times. Using `.hg`
919 /// instead of the system’s default temporary directory (such as `/tmp`) makes
919 /// instead of the system’s default temporary directory (such as `/tmp`) makes
920 /// it more likely the temporary file is in the same disk partition as contents
920 /// it more likely the temporary file is in the same disk partition as contents
921 /// of the working directory, which can matter since different filesystems may
921 /// of the working directory, which can matter since different filesystems may
922 /// store timestamps with different resolutions.
922 /// store timestamps with different resolutions.
923 ///
923 ///
924 /// This may fail, typically if we lack write permissions. In that case we
924 /// This may fail, typically if we lack write permissions. In that case we
925 /// should continue the `status()` algoritm anyway and consider the current
925 /// should continue the `status()` algoritm anyway and consider the current
926 /// date/time to be unknown.
926 /// date/time to be unknown.
927 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
927 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
928 tempfile::tempfile_in(repo_root.join(".hg"))?
928 tempfile::tempfile_in(repo_root.join(".hg"))?
929 .metadata()?
929 .metadata()?
930 .modified()
930 .modified()
931 }
931 }
General Comments 0
You need to be logged in to leave comments. Login now