##// END OF EJS Templates
rust-status: query fs traversal metadata lazily...
Raphaël Gomès -
r50459:da48f170 default
parent child Browse files
Show More
@@ -1,903 +1,975 b''
1 use crate::dirstate::entry::TruncatedTimestamp;
1 use crate::dirstate::entry::TruncatedTimestamp;
2 use crate::dirstate::status::IgnoreFnType;
2 use crate::dirstate::status::IgnoreFnType;
3 use crate::dirstate::status::StatusPath;
3 use crate::dirstate::status::StatusPath;
4 use crate::dirstate_tree::dirstate_map::BorrowedPath;
4 use crate::dirstate_tree::dirstate_map::BorrowedPath;
5 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
5 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
6 use crate::dirstate_tree::dirstate_map::DirstateMap;
6 use crate::dirstate_tree::dirstate_map::DirstateMap;
7 use crate::dirstate_tree::dirstate_map::DirstateVersion;
7 use crate::dirstate_tree::dirstate_map::DirstateVersion;
8 use crate::dirstate_tree::dirstate_map::NodeRef;
8 use crate::dirstate_tree::dirstate_map::NodeRef;
9 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
9 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
10 use crate::matchers::get_ignore_function;
10 use crate::matchers::get_ignore_function;
11 use crate::matchers::Matcher;
11 use crate::matchers::Matcher;
12 use crate::utils::files::get_bytes_from_os_string;
12 use crate::utils::files::get_bytes_from_os_string;
13 use crate::utils::files::get_path_from_bytes;
13 use crate::utils::files::get_path_from_bytes;
14 use crate::utils::hg_path::HgPath;
14 use crate::utils::hg_path::HgPath;
15 use crate::BadMatch;
15 use crate::BadMatch;
16 use crate::DirstateStatus;
16 use crate::DirstateStatus;
17 use crate::HgPathBuf;
18 use crate::HgPathCow;
17 use crate::HgPathCow;
19 use crate::PatternFileWarning;
18 use crate::PatternFileWarning;
20 use crate::StatusError;
19 use crate::StatusError;
21 use crate::StatusOptions;
20 use crate::StatusOptions;
22 use micro_timer::timed;
21 use micro_timer::timed;
23 use once_cell::sync::OnceCell;
22 use once_cell::sync::OnceCell;
24 use rayon::prelude::*;
23 use rayon::prelude::*;
25 use sha1::{Digest, Sha1};
24 use sha1::{Digest, Sha1};
26 use std::borrow::Cow;
25 use std::borrow::Cow;
26 use std::convert::TryFrom;
27 use std::convert::TryInto;
27 use std::io;
28 use std::io;
28 use std::path::Path;
29 use std::path::Path;
29 use std::path::PathBuf;
30 use std::path::PathBuf;
30 use std::sync::Mutex;
31 use std::sync::Mutex;
31 use std::time::SystemTime;
32 use std::time::SystemTime;
32
33
33 /// Returns the status of the working directory compared to its parent
34 /// Returns the status of the working directory compared to its parent
34 /// changeset.
35 /// changeset.
35 ///
36 ///
36 /// This algorithm is based on traversing the filesystem tree (`fs` in function
37 /// This algorithm is based on traversing the filesystem tree (`fs` in function
37 /// and variable names) and dirstate tree at the same time. The core of this
38 /// and variable names) and dirstate tree at the same time. The core of this
38 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
39 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
39 /// and its use of `itertools::merge_join_by`. When reaching a path that only
40 /// and its use of `itertools::merge_join_by`. When reaching a path that only
40 /// exists in one of the two trees, depending on information requested by
41 /// exists in one of the two trees, depending on information requested by
41 /// `options` we may need to traverse the remaining subtree.
42 /// `options` we may need to traverse the remaining subtree.
42 #[timed]
43 #[timed]
43 pub fn status<'dirstate>(
44 pub fn status<'dirstate>(
44 dmap: &'dirstate mut DirstateMap,
45 dmap: &'dirstate mut DirstateMap,
45 matcher: &(dyn Matcher + Sync),
46 matcher: &(dyn Matcher + Sync),
46 root_dir: PathBuf,
47 root_dir: PathBuf,
47 ignore_files: Vec<PathBuf>,
48 ignore_files: Vec<PathBuf>,
48 options: StatusOptions,
49 options: StatusOptions,
49 ) -> Result<(DirstateStatus<'dirstate>, Vec<PatternFileWarning>), StatusError>
50 ) -> Result<(DirstateStatus<'dirstate>, Vec<PatternFileWarning>), StatusError>
50 {
51 {
51 // Force the global rayon threadpool to not exceed 16 concurrent threads.
52 // Force the global rayon threadpool to not exceed 16 concurrent threads.
52 // This is a stop-gap measure until we figure out why using more than 16
53 // This is a stop-gap measure until we figure out why using more than 16
53 // threads makes `status` slower for each additional thread.
54 // threads makes `status` slower for each additional thread.
54 // We use `ok()` in case the global threadpool has already been
55 // We use `ok()` in case the global threadpool has already been
55 // instantiated in `rhg` or some other caller.
56 // instantiated in `rhg` or some other caller.
56 // TODO find the underlying cause and fix it, then remove this.
57 // TODO find the underlying cause and fix it, then remove this.
57 rayon::ThreadPoolBuilder::new()
58 rayon::ThreadPoolBuilder::new()
58 .num_threads(16)
59 .num_threads(16)
59 .build_global()
60 .build_global()
60 .ok();
61 .ok();
61
62
62 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
63 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
63 if options.list_ignored || options.list_unknown {
64 if options.list_ignored || options.list_unknown {
64 let (ignore_fn, warnings, changed) = match dmap.dirstate_version {
65 let (ignore_fn, warnings, changed) = match dmap.dirstate_version {
65 DirstateVersion::V1 => {
66 DirstateVersion::V1 => {
66 let (ignore_fn, warnings) = get_ignore_function(
67 let (ignore_fn, warnings) = get_ignore_function(
67 ignore_files,
68 ignore_files,
68 &root_dir,
69 &root_dir,
69 &mut |_pattern_bytes| {},
70 &mut |_pattern_bytes| {},
70 )?;
71 )?;
71 (ignore_fn, warnings, None)
72 (ignore_fn, warnings, None)
72 }
73 }
73 DirstateVersion::V2 => {
74 DirstateVersion::V2 => {
74 let mut hasher = Sha1::new();
75 let mut hasher = Sha1::new();
75 let (ignore_fn, warnings) = get_ignore_function(
76 let (ignore_fn, warnings) = get_ignore_function(
76 ignore_files,
77 ignore_files,
77 &root_dir,
78 &root_dir,
78 &mut |pattern_bytes| hasher.update(pattern_bytes),
79 &mut |pattern_bytes| hasher.update(pattern_bytes),
79 )?;
80 )?;
80 let new_hash = *hasher.finalize().as_ref();
81 let new_hash = *hasher.finalize().as_ref();
81 let changed = new_hash != dmap.ignore_patterns_hash;
82 let changed = new_hash != dmap.ignore_patterns_hash;
82 dmap.ignore_patterns_hash = new_hash;
83 dmap.ignore_patterns_hash = new_hash;
83 (ignore_fn, warnings, Some(changed))
84 (ignore_fn, warnings, Some(changed))
84 }
85 }
85 };
86 };
86 (ignore_fn, warnings, changed)
87 (ignore_fn, warnings, changed)
87 } else {
88 } else {
88 (Box::new(|&_| true), vec![], None)
89 (Box::new(|&_| true), vec![], None)
89 };
90 };
90
91
91 let filesystem_time_at_status_start =
92 let filesystem_time_at_status_start =
92 filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
93 filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
93
94
94 // If the repository is under the current directory, prefer using a
95 // If the repository is under the current directory, prefer using a
95 // relative path, so the kernel needs to traverse fewer directory in every
96 // relative path, so the kernel needs to traverse fewer directory in every
96 // call to `read_dir` or `symlink_metadata`.
97 // call to `read_dir` or `symlink_metadata`.
97 // This is effective in the common case where the current directory is the
98 // This is effective in the common case where the current directory is the
98 // repository root.
99 // repository root.
99
100
100 // TODO: Better yet would be to use libc functions like `openat` and
101 // TODO: Better yet would be to use libc functions like `openat` and
101 // `fstatat` to remove such repeated traversals entirely, but the standard
102 // `fstatat` to remove such repeated traversals entirely, but the standard
102 // library does not provide APIs based on those.
103 // library does not provide APIs based on those.
103 // Maybe with a crate like https://crates.io/crates/openat instead?
104 // Maybe with a crate like https://crates.io/crates/openat instead?
104 let root_dir = if let Some(relative) = std::env::current_dir()
105 let root_dir = if let Some(relative) = std::env::current_dir()
105 .ok()
106 .ok()
106 .and_then(|cwd| root_dir.strip_prefix(cwd).ok())
107 .and_then(|cwd| root_dir.strip_prefix(cwd).ok())
107 {
108 {
108 relative
109 relative
109 } else {
110 } else {
110 &root_dir
111 &root_dir
111 };
112 };
112
113
113 let outcome = DirstateStatus {
114 let outcome = DirstateStatus {
114 filesystem_time_at_status_start,
115 filesystem_time_at_status_start,
115 ..Default::default()
116 ..Default::default()
116 };
117 };
117 let common = StatusCommon {
118 let common = StatusCommon {
118 dmap,
119 dmap,
119 options,
120 options,
120 matcher,
121 matcher,
121 ignore_fn,
122 ignore_fn,
122 outcome: Mutex::new(outcome),
123 outcome: Mutex::new(outcome),
123 ignore_patterns_have_changed: patterns_changed,
124 ignore_patterns_have_changed: patterns_changed,
124 new_cachable_directories: Default::default(),
125 new_cachable_directories: Default::default(),
125 outated_cached_directories: Default::default(),
126 outated_cached_directories: Default::default(),
126 filesystem_time_at_status_start,
127 filesystem_time_at_status_start,
127 };
128 };
128 let is_at_repo_root = true;
129 let is_at_repo_root = true;
129 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
130 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
130 let has_ignored_ancestor = HasIgnoredAncestor::create(None, hg_path);
131 let has_ignored_ancestor = HasIgnoredAncestor::create(None, hg_path);
131 let root_cached_mtime = None;
132 let root_cached_mtime = None;
132 let root_dir_metadata = None;
133 // If the path we have for the repository root is a symlink, do follow it.
133 // If the path we have for the repository root is a symlink, do follow it.
134 // (As opposed to symlinks within the working directory which are not
134 // (As opposed to symlinks within the working directory which are not
135 // followed, using `std::fs::symlink_metadata`.)
135 // followed, using `std::fs::symlink_metadata`.)
136 common.traverse_fs_directory_and_dirstate(
136 common.traverse_fs_directory_and_dirstate(
137 &has_ignored_ancestor,
137 &has_ignored_ancestor,
138 dmap.root.as_ref(),
138 dmap.root.as_ref(),
139 hg_path,
139 hg_path,
140 &root_dir,
140 &DirEntry {
141 root_dir_metadata,
141 hg_path: Cow::Borrowed(HgPath::new(b"")),
142 fs_path: Cow::Borrowed(&root_dir),
143 symlink_metadata: None,
144 file_type: FakeFileType::Directory,
145 },
142 root_cached_mtime,
146 root_cached_mtime,
143 is_at_repo_root,
147 is_at_repo_root,
144 )?;
148 )?;
145 let mut outcome = common.outcome.into_inner().unwrap();
149 let mut outcome = common.outcome.into_inner().unwrap();
146 let new_cachable = common.new_cachable_directories.into_inner().unwrap();
150 let new_cachable = common.new_cachable_directories.into_inner().unwrap();
147 let outdated = common.outated_cached_directories.into_inner().unwrap();
151 let outdated = common.outated_cached_directories.into_inner().unwrap();
148
152
149 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
153 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
150 || !outdated.is_empty()
154 || !outdated.is_empty()
151 || (!new_cachable.is_empty()
155 || (!new_cachable.is_empty()
152 && dmap.dirstate_version == DirstateVersion::V2);
156 && dmap.dirstate_version == DirstateVersion::V2);
153
157
154 // Remove outdated mtimes before adding new mtimes, in case a given
158 // Remove outdated mtimes before adding new mtimes, in case a given
155 // directory is both
159 // directory is both
156 for path in &outdated {
160 for path in &outdated {
157 dmap.clear_cached_mtime(path)?;
161 dmap.clear_cached_mtime(path)?;
158 }
162 }
159 for (path, mtime) in &new_cachable {
163 for (path, mtime) in &new_cachable {
160 dmap.set_cached_mtime(path, *mtime)?;
164 dmap.set_cached_mtime(path, *mtime)?;
161 }
165 }
162
166
163 Ok((outcome, warnings))
167 Ok((outcome, warnings))
164 }
168 }
165
169
166 /// Bag of random things needed by various parts of the algorithm. Reduces the
170 /// Bag of random things needed by various parts of the algorithm. Reduces the
167 /// number of parameters passed to functions.
171 /// number of parameters passed to functions.
168 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
172 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
169 dmap: &'tree DirstateMap<'on_disk>,
173 dmap: &'tree DirstateMap<'on_disk>,
170 options: StatusOptions,
174 options: StatusOptions,
171 matcher: &'a (dyn Matcher + Sync),
175 matcher: &'a (dyn Matcher + Sync),
172 ignore_fn: IgnoreFnType<'a>,
176 ignore_fn: IgnoreFnType<'a>,
173 outcome: Mutex<DirstateStatus<'on_disk>>,
177 outcome: Mutex<DirstateStatus<'on_disk>>,
174 new_cachable_directories:
178 new_cachable_directories:
175 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
179 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
176 outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
180 outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
177
181
178 /// Whether ignore files like `.hgignore` have changed since the previous
182 /// Whether ignore files like `.hgignore` have changed since the previous
179 /// time a `status()` call wrote their hash to the dirstate. `None` means
183 /// time a `status()` call wrote their hash to the dirstate. `None` means
180 /// we don’t know as this run doesn’t list either ignored or uknown files
184 /// we don’t know as this run doesn’t list either ignored or uknown files
181 /// and therefore isn’t reading `.hgignore`.
185 /// and therefore isn’t reading `.hgignore`.
182 ignore_patterns_have_changed: Option<bool>,
186 ignore_patterns_have_changed: Option<bool>,
183
187
184 /// The current time at the start of the `status()` algorithm, as measured
188 /// The current time at the start of the `status()` algorithm, as measured
185 /// and possibly truncated by the filesystem.
189 /// and possibly truncated by the filesystem.
186 filesystem_time_at_status_start: Option<TruncatedTimestamp>,
190 filesystem_time_at_status_start: Option<TruncatedTimestamp>,
187 }
191 }
188
192
189 enum Outcome {
193 enum Outcome {
190 Modified,
194 Modified,
191 Added,
195 Added,
192 Removed,
196 Removed,
193 Deleted,
197 Deleted,
194 Clean,
198 Clean,
195 Ignored,
199 Ignored,
196 Unknown,
200 Unknown,
197 Unsure,
201 Unsure,
198 }
202 }
199
203
200 /// Lazy computation of whether a given path has a hgignored
204 /// Lazy computation of whether a given path has a hgignored
201 /// ancestor.
205 /// ancestor.
202 struct HasIgnoredAncestor<'a> {
206 struct HasIgnoredAncestor<'a> {
203 /// `path` and `parent` constitute the inputs to the computation,
207 /// `path` and `parent` constitute the inputs to the computation,
204 /// `cache` stores the outcome.
208 /// `cache` stores the outcome.
205 path: &'a HgPath,
209 path: &'a HgPath,
206 parent: Option<&'a HasIgnoredAncestor<'a>>,
210 parent: Option<&'a HasIgnoredAncestor<'a>>,
207 cache: OnceCell<bool>,
211 cache: OnceCell<bool>,
208 }
212 }
209
213
210 impl<'a> HasIgnoredAncestor<'a> {
214 impl<'a> HasIgnoredAncestor<'a> {
211 fn create(
215 fn create(
212 parent: Option<&'a HasIgnoredAncestor<'a>>,
216 parent: Option<&'a HasIgnoredAncestor<'a>>,
213 path: &'a HgPath,
217 path: &'a HgPath,
214 ) -> HasIgnoredAncestor<'a> {
218 ) -> HasIgnoredAncestor<'a> {
215 Self {
219 Self {
216 path,
220 path,
217 parent,
221 parent,
218 cache: OnceCell::new(),
222 cache: OnceCell::new(),
219 }
223 }
220 }
224 }
221
225
222 fn force<'b>(&self, ignore_fn: &IgnoreFnType<'b>) -> bool {
226 fn force<'b>(&self, ignore_fn: &IgnoreFnType<'b>) -> bool {
223 match self.parent {
227 match self.parent {
224 None => false,
228 None => false,
225 Some(parent) => {
229 Some(parent) => {
226 *(parent.cache.get_or_init(|| {
230 *(parent.cache.get_or_init(|| {
227 parent.force(ignore_fn) || ignore_fn(&self.path)
231 parent.force(ignore_fn) || ignore_fn(&self.path)
228 }))
232 }))
229 }
233 }
230 }
234 }
231 }
235 }
232 }
236 }
233
237
234 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
238 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
235 fn push_outcome(
239 fn push_outcome(
236 &self,
240 &self,
237 which: Outcome,
241 which: Outcome,
238 dirstate_node: &NodeRef<'tree, 'on_disk>,
242 dirstate_node: &NodeRef<'tree, 'on_disk>,
239 ) -> Result<(), DirstateV2ParseError> {
243 ) -> Result<(), DirstateV2ParseError> {
240 let path = dirstate_node
244 let path = dirstate_node
241 .full_path_borrowed(self.dmap.on_disk)?
245 .full_path_borrowed(self.dmap.on_disk)?
242 .detach_from_tree();
246 .detach_from_tree();
243 let copy_source = if self.options.list_copies {
247 let copy_source = if self.options.list_copies {
244 dirstate_node
248 dirstate_node
245 .copy_source_borrowed(self.dmap.on_disk)?
249 .copy_source_borrowed(self.dmap.on_disk)?
246 .map(|source| source.detach_from_tree())
250 .map(|source| source.detach_from_tree())
247 } else {
251 } else {
248 None
252 None
249 };
253 };
250 self.push_outcome_common(which, path, copy_source);
254 self.push_outcome_common(which, path, copy_source);
251 Ok(())
255 Ok(())
252 }
256 }
253
257
254 fn push_outcome_without_copy_source(
258 fn push_outcome_without_copy_source(
255 &self,
259 &self,
256 which: Outcome,
260 which: Outcome,
257 path: &BorrowedPath<'_, 'on_disk>,
261 path: &BorrowedPath<'_, 'on_disk>,
258 ) {
262 ) {
259 self.push_outcome_common(which, path.detach_from_tree(), None)
263 self.push_outcome_common(which, path.detach_from_tree(), None)
260 }
264 }
261
265
262 fn push_outcome_common(
266 fn push_outcome_common(
263 &self,
267 &self,
264 which: Outcome,
268 which: Outcome,
265 path: HgPathCow<'on_disk>,
269 path: HgPathCow<'on_disk>,
266 copy_source: Option<HgPathCow<'on_disk>>,
270 copy_source: Option<HgPathCow<'on_disk>>,
267 ) {
271 ) {
268 let mut outcome = self.outcome.lock().unwrap();
272 let mut outcome = self.outcome.lock().unwrap();
269 let vec = match which {
273 let vec = match which {
270 Outcome::Modified => &mut outcome.modified,
274 Outcome::Modified => &mut outcome.modified,
271 Outcome::Added => &mut outcome.added,
275 Outcome::Added => &mut outcome.added,
272 Outcome::Removed => &mut outcome.removed,
276 Outcome::Removed => &mut outcome.removed,
273 Outcome::Deleted => &mut outcome.deleted,
277 Outcome::Deleted => &mut outcome.deleted,
274 Outcome::Clean => &mut outcome.clean,
278 Outcome::Clean => &mut outcome.clean,
275 Outcome::Ignored => &mut outcome.ignored,
279 Outcome::Ignored => &mut outcome.ignored,
276 Outcome::Unknown => &mut outcome.unknown,
280 Outcome::Unknown => &mut outcome.unknown,
277 Outcome::Unsure => &mut outcome.unsure,
281 Outcome::Unsure => &mut outcome.unsure,
278 };
282 };
279 vec.push(StatusPath { path, copy_source });
283 vec.push(StatusPath { path, copy_source });
280 }
284 }
281
285
282 fn read_dir(
286 fn read_dir(
283 &self,
287 &self,
284 hg_path: &HgPath,
288 hg_path: &HgPath,
285 fs_path: &Path,
289 fs_path: &Path,
286 is_at_repo_root: bool,
290 is_at_repo_root: bool,
287 ) -> Result<Vec<DirEntry>, ()> {
291 ) -> Result<Vec<DirEntry>, ()> {
288 DirEntry::read_dir(fs_path, is_at_repo_root)
292 DirEntry::read_dir(fs_path, is_at_repo_root)
289 .map_err(|error| self.io_error(error, hg_path))
293 .map_err(|error| self.io_error(error, hg_path))
290 }
294 }
291
295
292 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
296 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
293 let errno = error.raw_os_error().expect("expected real OS error");
297 let errno = error.raw_os_error().expect("expected real OS error");
294 self.outcome
298 self.outcome
295 .lock()
299 .lock()
296 .unwrap()
300 .unwrap()
297 .bad
301 .bad
298 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
302 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
299 }
303 }
300
304
301 fn check_for_outdated_directory_cache(
305 fn check_for_outdated_directory_cache(
302 &self,
306 &self,
303 dirstate_node: &NodeRef<'tree, 'on_disk>,
307 dirstate_node: &NodeRef<'tree, 'on_disk>,
304 ) -> Result<(), DirstateV2ParseError> {
308 ) -> Result<(), DirstateV2ParseError> {
305 if self.ignore_patterns_have_changed == Some(true)
309 if self.ignore_patterns_have_changed == Some(true)
306 && dirstate_node.cached_directory_mtime()?.is_some()
310 && dirstate_node.cached_directory_mtime()?.is_some()
307 {
311 {
308 self.outated_cached_directories.lock().unwrap().push(
312 self.outated_cached_directories.lock().unwrap().push(
309 dirstate_node
313 dirstate_node
310 .full_path_borrowed(self.dmap.on_disk)?
314 .full_path_borrowed(self.dmap.on_disk)?
311 .detach_from_tree(),
315 .detach_from_tree(),
312 )
316 )
313 }
317 }
314 Ok(())
318 Ok(())
315 }
319 }
316
320
317 /// If this returns true, we can get accurate results by only using
321 /// If this returns true, we can get accurate results by only using
318 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
322 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
319 /// need to call `read_dir`.
323 /// need to call `read_dir`.
320 fn can_skip_fs_readdir(
324 fn can_skip_fs_readdir(
321 &self,
325 &self,
322 directory_metadata: Option<&std::fs::Metadata>,
326 directory_entry: &DirEntry,
323 cached_directory_mtime: Option<TruncatedTimestamp>,
327 cached_directory_mtime: Option<TruncatedTimestamp>,
324 ) -> bool {
328 ) -> bool {
325 if !self.options.list_unknown && !self.options.list_ignored {
329 if !self.options.list_unknown && !self.options.list_ignored {
326 // All states that we care about listing have corresponding
330 // All states that we care about listing have corresponding
327 // dirstate entries.
331 // dirstate entries.
328 // This happens for example with `hg status -mard`.
332 // This happens for example with `hg status -mard`.
329 return true;
333 return true;
330 }
334 }
331 if !self.options.list_ignored
335 if !self.options.list_ignored
332 && self.ignore_patterns_have_changed == Some(false)
336 && self.ignore_patterns_have_changed == Some(false)
333 {
337 {
334 if let Some(cached_mtime) = cached_directory_mtime {
338 if let Some(cached_mtime) = cached_directory_mtime {
335 // The dirstate contains a cached mtime for this directory, set
339 // The dirstate contains a cached mtime for this directory, set
336 // by a previous run of the `status` algorithm which found this
340 // by a previous run of the `status` algorithm which found this
337 // directory eligible for `read_dir` caching.
341 // directory eligible for `read_dir` caching.
338 if let Some(meta) = directory_metadata {
342 if let Ok(meta) = directory_entry.symlink_metadata() {
339 if cached_mtime
343 if cached_mtime
340 .likely_equal_to_mtime_of(meta)
344 .likely_equal_to_mtime_of(&meta)
341 .unwrap_or(false)
345 .unwrap_or(false)
342 {
346 {
343 // The mtime of that directory has not changed
347 // The mtime of that directory has not changed
344 // since then, which means that the results of
348 // since then, which means that the results of
345 // `read_dir` should also be unchanged.
349 // `read_dir` should also be unchanged.
346 return true;
350 return true;
347 }
351 }
348 }
352 }
349 }
353 }
350 }
354 }
351 false
355 false
352 }
356 }
353
357
354 /// Returns whether all child entries of the filesystem directory have a
358 /// Returns whether all child entries of the filesystem directory have a
355 /// corresponding dirstate node or are ignored.
359 /// corresponding dirstate node or are ignored.
356 fn traverse_fs_directory_and_dirstate<'ancestor>(
360 fn traverse_fs_directory_and_dirstate<'ancestor>(
357 &self,
361 &self,
358 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
362 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
359 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
363 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
360 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
364 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
361 directory_fs_path: &Path,
365 directory_entry: &DirEntry,
362 directory_metadata: Option<&std::fs::Metadata>,
363 cached_directory_mtime: Option<TruncatedTimestamp>,
366 cached_directory_mtime: Option<TruncatedTimestamp>,
364 is_at_repo_root: bool,
367 is_at_repo_root: bool,
365 ) -> Result<bool, DirstateV2ParseError> {
368 ) -> Result<bool, DirstateV2ParseError> {
366 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
369 if self.can_skip_fs_readdir(directory_entry, cached_directory_mtime) {
367 {
368 dirstate_nodes
370 dirstate_nodes
369 .par_iter()
371 .par_iter()
370 .map(|dirstate_node| {
372 .map(|dirstate_node| {
371 let fs_path = directory_fs_path.join(get_path_from_bytes(
373 let fs_path = &directory_entry.fs_path;
374 let fs_path = fs_path.join(get_path_from_bytes(
372 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
375 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
373 ));
376 ));
374 match std::fs::symlink_metadata(&fs_path) {
377 match std::fs::symlink_metadata(&fs_path) {
375 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
378 Ok(fs_metadata) => {
376 &fs_path,
379 let file_type =
377 &fs_metadata,
380 match fs_metadata.file_type().try_into() {
381 Ok(file_type) => file_type,
382 Err(_) => return Ok(()),
383 };
384 let entry = DirEntry {
385 hg_path: Cow::Borrowed(
386 dirstate_node
387 .full_path(&self.dmap.on_disk)?,
388 ),
389 fs_path: Cow::Borrowed(&fs_path),
390 symlink_metadata: Some(fs_metadata),
391 file_type,
392 };
393 self.traverse_fs_and_dirstate(
394 &entry,
378 dirstate_node,
395 dirstate_node,
379 has_ignored_ancestor,
396 has_ignored_ancestor,
380 ),
397 )
398 }
381 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
399 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
382 self.traverse_dirstate_only(dirstate_node)
400 self.traverse_dirstate_only(dirstate_node)
383 }
401 }
384 Err(error) => {
402 Err(error) => {
385 let hg_path =
403 let hg_path =
386 dirstate_node.full_path(self.dmap.on_disk)?;
404 dirstate_node.full_path(self.dmap.on_disk)?;
387 Ok(self.io_error(error, hg_path))
405 Ok(self.io_error(error, hg_path))
388 }
406 }
389 }
407 }
390 })
408 })
391 .collect::<Result<_, _>>()?;
409 .collect::<Result<_, _>>()?;
392
410
393 // We don’t know, so conservatively say this isn’t the case
411 // We don’t know, so conservatively say this isn’t the case
394 let children_all_have_dirstate_node_or_are_ignored = false;
412 let children_all_have_dirstate_node_or_are_ignored = false;
395
413
396 return Ok(children_all_have_dirstate_node_or_are_ignored);
414 return Ok(children_all_have_dirstate_node_or_are_ignored);
397 }
415 }
398
416
399 let mut fs_entries = if let Ok(entries) = self.read_dir(
417 let mut fs_entries = if let Ok(entries) = self.read_dir(
400 directory_hg_path,
418 directory_hg_path,
401 directory_fs_path,
419 &directory_entry.fs_path,
402 is_at_repo_root,
420 is_at_repo_root,
403 ) {
421 ) {
404 entries
422 entries
405 } else {
423 } else {
406 // Treat an unreadable directory (typically because of insufficient
424 // Treat an unreadable directory (typically because of insufficient
407 // permissions) like an empty directory. `self.read_dir` has
425 // permissions) like an empty directory. `self.read_dir` has
408 // already called `self.io_error` so a warning will be emitted.
426 // already called `self.io_error` so a warning will be emitted.
409 Vec::new()
427 Vec::new()
410 };
428 };
411
429
412 // `merge_join_by` requires both its input iterators to be sorted:
430 // `merge_join_by` requires both its input iterators to be sorted:
413
431
414 let dirstate_nodes = dirstate_nodes.sorted();
432 let dirstate_nodes = dirstate_nodes.sorted();
415 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
433 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
416 // https://github.com/rust-lang/rust/issues/34162
434 // https://github.com/rust-lang/rust/issues/34162
417 fs_entries.sort_unstable_by(|e1, e2| e1.hg_path.cmp(&e2.hg_path));
435 fs_entries.sort_unstable_by(|e1, e2| e1.hg_path.cmp(&e2.hg_path));
418
436
419 // Propagate here any error that would happen inside the comparison
437 // Propagate here any error that would happen inside the comparison
420 // callback below
438 // callback below
421 for dirstate_node in &dirstate_nodes {
439 for dirstate_node in &dirstate_nodes {
422 dirstate_node.base_name(self.dmap.on_disk)?;
440 dirstate_node.base_name(self.dmap.on_disk)?;
423 }
441 }
424 itertools::merge_join_by(
442 itertools::merge_join_by(
425 dirstate_nodes,
443 dirstate_nodes,
426 &fs_entries,
444 &fs_entries,
427 |dirstate_node, fs_entry| {
445 |dirstate_node, fs_entry| {
428 // This `unwrap` never panics because we already propagated
446 // This `unwrap` never panics because we already propagated
429 // those errors above
447 // those errors above
430 dirstate_node
448 dirstate_node
431 .base_name(self.dmap.on_disk)
449 .base_name(self.dmap.on_disk)
432 .unwrap()
450 .unwrap()
433 .cmp(&fs_entry.hg_path)
451 .cmp(&fs_entry.hg_path)
434 },
452 },
435 )
453 )
436 .par_bridge()
454 .par_bridge()
437 .map(|pair| {
455 .map(|pair| {
438 use itertools::EitherOrBoth::*;
456 use itertools::EitherOrBoth::*;
439 let has_dirstate_node_or_is_ignored;
457 let has_dirstate_node_or_is_ignored;
440 match pair {
458 match pair {
441 Both(dirstate_node, fs_entry) => {
459 Both(dirstate_node, fs_entry) => {
442 self.traverse_fs_and_dirstate(
460 self.traverse_fs_and_dirstate(
443 &fs_entry.fs_path,
461 &fs_entry,
444 &fs_entry.metadata,
445 dirstate_node,
462 dirstate_node,
446 has_ignored_ancestor,
463 has_ignored_ancestor,
447 )?;
464 )?;
448 has_dirstate_node_or_is_ignored = true
465 has_dirstate_node_or_is_ignored = true
449 }
466 }
450 Left(dirstate_node) => {
467 Left(dirstate_node) => {
451 self.traverse_dirstate_only(dirstate_node)?;
468 self.traverse_dirstate_only(dirstate_node)?;
452 has_dirstate_node_or_is_ignored = true;
469 has_dirstate_node_or_is_ignored = true;
453 }
470 }
454 Right(fs_entry) => {
471 Right(fs_entry) => {
455 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
472 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
456 has_ignored_ancestor.force(&self.ignore_fn),
473 has_ignored_ancestor.force(&self.ignore_fn),
457 directory_hg_path,
474 directory_hg_path,
458 fs_entry,
475 fs_entry,
459 )
476 )
460 }
477 }
461 }
478 }
462 Ok(has_dirstate_node_or_is_ignored)
479 Ok(has_dirstate_node_or_is_ignored)
463 })
480 })
464 .try_reduce(|| true, |a, b| Ok(a && b))
481 .try_reduce(|| true, |a, b| Ok(a && b))
465 }
482 }
466
483
467 fn traverse_fs_and_dirstate<'ancestor>(
484 fn traverse_fs_and_dirstate<'ancestor>(
468 &self,
485 &self,
469 fs_path: &Path,
486 fs_entry: &DirEntry,
470 fs_metadata: &std::fs::Metadata,
471 dirstate_node: NodeRef<'tree, 'on_disk>,
487 dirstate_node: NodeRef<'tree, 'on_disk>,
472 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
488 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
473 ) -> Result<(), DirstateV2ParseError> {
489 ) -> Result<(), DirstateV2ParseError> {
474 self.check_for_outdated_directory_cache(&dirstate_node)?;
490 self.check_for_outdated_directory_cache(&dirstate_node)?;
475 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
491 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
476 let file_type = fs_metadata.file_type();
492 let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink();
477 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
478 if !file_or_symlink {
493 if !file_or_symlink {
479 // If we previously had a file here, it was removed (with
494 // If we previously had a file here, it was removed (with
480 // `hg rm` or similar) or deleted before it could be
495 // `hg rm` or similar) or deleted before it could be
481 // replaced by a directory or something else.
496 // replaced by a directory or something else.
482 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
497 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
483 }
498 }
484 if file_type.is_dir() {
499 if fs_entry.is_dir() {
485 if self.options.collect_traversed_dirs {
500 if self.options.collect_traversed_dirs {
486 self.outcome
501 self.outcome
487 .lock()
502 .lock()
488 .unwrap()
503 .unwrap()
489 .traversed
504 .traversed
490 .push(hg_path.detach_from_tree())
505 .push(hg_path.detach_from_tree())
491 }
506 }
492 let is_ignored = HasIgnoredAncestor::create(
507 let is_ignored = HasIgnoredAncestor::create(
493 Some(&has_ignored_ancestor),
508 Some(&has_ignored_ancestor),
494 hg_path,
509 hg_path,
495 );
510 );
496 let is_at_repo_root = false;
511 let is_at_repo_root = false;
497 let children_all_have_dirstate_node_or_are_ignored = self
512 let children_all_have_dirstate_node_or_are_ignored = self
498 .traverse_fs_directory_and_dirstate(
513 .traverse_fs_directory_and_dirstate(
499 &is_ignored,
514 &is_ignored,
500 dirstate_node.children(self.dmap.on_disk)?,
515 dirstate_node.children(self.dmap.on_disk)?,
501 hg_path,
516 hg_path,
502 fs_path,
517 fs_entry,
503 Some(fs_metadata),
504 dirstate_node.cached_directory_mtime()?,
518 dirstate_node.cached_directory_mtime()?,
505 is_at_repo_root,
519 is_at_repo_root,
506 )?;
520 )?;
507 self.maybe_save_directory_mtime(
521 self.maybe_save_directory_mtime(
508 children_all_have_dirstate_node_or_are_ignored,
522 children_all_have_dirstate_node_or_are_ignored,
509 fs_metadata,
523 fs_entry,
510 dirstate_node,
524 dirstate_node,
511 )?
525 )?
512 } else {
526 } else {
513 if file_or_symlink && self.matcher.matches(&hg_path) {
527 if file_or_symlink && self.matcher.matches(&hg_path) {
514 if let Some(entry) = dirstate_node.entry()? {
528 if let Some(entry) = dirstate_node.entry()? {
515 if !entry.any_tracked() {
529 if !entry.any_tracked() {
516 // Forward-compat if we start tracking unknown/ignored
530 // Forward-compat if we start tracking unknown/ignored
517 // files for caching reasons
531 // files for caching reasons
518 self.mark_unknown_or_ignored(
532 self.mark_unknown_or_ignored(
519 has_ignored_ancestor.force(&self.ignore_fn),
533 has_ignored_ancestor.force(&self.ignore_fn),
520 &hg_path,
534 &hg_path,
521 );
535 );
522 }
536 }
523 if entry.added() {
537 if entry.added() {
524 self.push_outcome(Outcome::Added, &dirstate_node)?;
538 self.push_outcome(Outcome::Added, &dirstate_node)?;
525 } else if entry.removed() {
539 } else if entry.removed() {
526 self.push_outcome(Outcome::Removed, &dirstate_node)?;
540 self.push_outcome(Outcome::Removed, &dirstate_node)?;
527 } else if entry.modified() {
541 } else if entry.modified() {
528 self.push_outcome(Outcome::Modified, &dirstate_node)?;
542 self.push_outcome(Outcome::Modified, &dirstate_node)?;
529 } else {
543 } else {
530 self.handle_normal_file(&dirstate_node, fs_metadata)?;
544 self.handle_normal_file(&dirstate_node, fs_entry)?;
531 }
545 }
532 } else {
546 } else {
533 // `node.entry.is_none()` indicates a "directory"
547 // `node.entry.is_none()` indicates a "directory"
534 // node, but the filesystem has a file
548 // node, but the filesystem has a file
535 self.mark_unknown_or_ignored(
549 self.mark_unknown_or_ignored(
536 has_ignored_ancestor.force(&self.ignore_fn),
550 has_ignored_ancestor.force(&self.ignore_fn),
537 hg_path,
551 hg_path,
538 );
552 );
539 }
553 }
540 }
554 }
541
555
542 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
556 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
543 {
557 {
544 self.traverse_dirstate_only(child_node)?
558 self.traverse_dirstate_only(child_node)?
545 }
559 }
546 }
560 }
547 Ok(())
561 Ok(())
548 }
562 }
549
563
550 fn maybe_save_directory_mtime(
564 fn maybe_save_directory_mtime(
551 &self,
565 &self,
552 children_all_have_dirstate_node_or_are_ignored: bool,
566 children_all_have_dirstate_node_or_are_ignored: bool,
553 directory_metadata: &std::fs::Metadata,
567 directory_entry: &DirEntry,
554 dirstate_node: NodeRef<'tree, 'on_disk>,
568 dirstate_node: NodeRef<'tree, 'on_disk>,
555 ) -> Result<(), DirstateV2ParseError> {
569 ) -> Result<(), DirstateV2ParseError> {
556 if !children_all_have_dirstate_node_or_are_ignored {
570 if !children_all_have_dirstate_node_or_are_ignored {
557 return Ok(());
571 return Ok(());
558 }
572 }
559 // All filesystem directory entries from `read_dir` have a
573 // All filesystem directory entries from `read_dir` have a
560 // corresponding node in the dirstate, so we can reconstitute the
574 // corresponding node in the dirstate, so we can reconstitute the
561 // names of those entries without calling `read_dir` again.
575 // names of those entries without calling `read_dir` again.
562
576
563 // TODO: use let-else here and below when available:
577 // TODO: use let-else here and below when available:
564 // https://github.com/rust-lang/rust/issues/87335
578 // https://github.com/rust-lang/rust/issues/87335
565 let status_start = if let Some(status_start) =
579 let status_start = if let Some(status_start) =
566 &self.filesystem_time_at_status_start
580 &self.filesystem_time_at_status_start
567 {
581 {
568 status_start
582 status_start
569 } else {
583 } else {
570 return Ok(());
584 return Ok(());
571 };
585 };
572
586
573 // Although the Rust standard library’s `SystemTime` type
587 // Although the Rust standard library’s `SystemTime` type
574 // has nanosecond precision, the times reported for a
588 // has nanosecond precision, the times reported for a
575 // directory’s (or file’s) modified time may have lower
589 // directory’s (or file’s) modified time may have lower
576 // resolution based on the filesystem (for example ext3
590 // resolution based on the filesystem (for example ext3
577 // only stores integer seconds), kernel (see
591 // only stores integer seconds), kernel (see
578 // https://stackoverflow.com/a/14393315/1162888), etc.
592 // https://stackoverflow.com/a/14393315/1162888), etc.
593 let metadata = match directory_entry.symlink_metadata() {
594 Ok(meta) => meta,
595 Err(_) => return Ok(()),
596 };
579 let directory_mtime = if let Ok(option) =
597 let directory_mtime = if let Ok(option) =
580 TruncatedTimestamp::for_reliable_mtime_of(
598 TruncatedTimestamp::for_reliable_mtime_of(&metadata, status_start)
581 directory_metadata,
599 {
582 status_start,
583 ) {
584 if let Some(directory_mtime) = option {
600 if let Some(directory_mtime) = option {
585 directory_mtime
601 directory_mtime
586 } else {
602 } else {
587 // The directory was modified too recently,
603 // The directory was modified too recently,
588 // don’t cache its `read_dir` results.
604 // don’t cache its `read_dir` results.
589 //
605 //
590 // 1. A change to this directory (direct child was
606 // 1. A change to this directory (direct child was
591 // added or removed) cause its mtime to be set
607 // added or removed) cause its mtime to be set
592 // (possibly truncated) to `directory_mtime`
608 // (possibly truncated) to `directory_mtime`
593 // 2. This `status` algorithm calls `read_dir`
609 // 2. This `status` algorithm calls `read_dir`
594 // 3. An other change is made to the same directory is
610 // 3. An other change is made to the same directory is
595 // made so that calling `read_dir` agin would give
611 // made so that calling `read_dir` agin would give
596 // different results, but soon enough after 1. that
612 // different results, but soon enough after 1. that
597 // the mtime stays the same
613 // the mtime stays the same
598 //
614 //
599 // On a system where the time resolution poor, this
615 // On a system where the time resolution poor, this
600 // scenario is not unlikely if all three steps are caused
616 // scenario is not unlikely if all three steps are caused
601 // by the same script.
617 // by the same script.
602 return Ok(());
618 return Ok(());
603 }
619 }
604 } else {
620 } else {
605 // OS/libc does not support mtime?
621 // OS/libc does not support mtime?
606 return Ok(());
622 return Ok(());
607 };
623 };
608 // We’ve observed (through `status_start`) that time has
624 // We’ve observed (through `status_start`) that time has
609 // “progressed” since `directory_mtime`, so any further
625 // “progressed” since `directory_mtime`, so any further
610 // change to this directory is extremely likely to cause a
626 // change to this directory is extremely likely to cause a
611 // different mtime.
627 // different mtime.
612 //
628 //
613 // Having the same mtime again is not entirely impossible
629 // Having the same mtime again is not entirely impossible
614 // since the system clock is not monotonous. It could jump
630 // since the system clock is not monotonous. It could jump
615 // backward to some point before `directory_mtime`, then a
631 // backward to some point before `directory_mtime`, then a
616 // directory change could potentially happen during exactly
632 // directory change could potentially happen during exactly
617 // the wrong tick.
633 // the wrong tick.
618 //
634 //
619 // We deem this scenario (unlike the previous one) to be
635 // We deem this scenario (unlike the previous one) to be
620 // unlikely enough in practice.
636 // unlikely enough in practice.
621
637
622 let is_up_to_date =
638 let is_up_to_date =
623 if let Some(cached) = dirstate_node.cached_directory_mtime()? {
639 if let Some(cached) = dirstate_node.cached_directory_mtime()? {
624 cached.likely_equal(directory_mtime)
640 cached.likely_equal(directory_mtime)
625 } else {
641 } else {
626 false
642 false
627 };
643 };
628 if !is_up_to_date {
644 if !is_up_to_date {
629 let hg_path = dirstate_node
645 let hg_path = dirstate_node
630 .full_path_borrowed(self.dmap.on_disk)?
646 .full_path_borrowed(self.dmap.on_disk)?
631 .detach_from_tree();
647 .detach_from_tree();
632 self.new_cachable_directories
648 self.new_cachable_directories
633 .lock()
649 .lock()
634 .unwrap()
650 .unwrap()
635 .push((hg_path, directory_mtime))
651 .push((hg_path, directory_mtime))
636 }
652 }
637 Ok(())
653 Ok(())
638 }
654 }
639
655
640 /// A file that is clean in the dirstate was found in the filesystem
656 /// A file that is clean in the dirstate was found in the filesystem
641 fn handle_normal_file(
657 fn handle_normal_file(
642 &self,
658 &self,
643 dirstate_node: &NodeRef<'tree, 'on_disk>,
659 dirstate_node: &NodeRef<'tree, 'on_disk>,
644 fs_metadata: &std::fs::Metadata,
660 fs_entry: &DirEntry,
645 ) -> Result<(), DirstateV2ParseError> {
661 ) -> Result<(), DirstateV2ParseError> {
646 // Keep the low 31 bits
662 // Keep the low 31 bits
647 fn truncate_u64(value: u64) -> i32 {
663 fn truncate_u64(value: u64) -> i32 {
648 (value & 0x7FFF_FFFF) as i32
664 (value & 0x7FFF_FFFF) as i32
649 }
665 }
650
666
667 let fs_metadata = match fs_entry.symlink_metadata() {
668 Ok(meta) => meta,
669 Err(_) => return Ok(()),
670 };
671
651 let entry = dirstate_node
672 let entry = dirstate_node
652 .entry()?
673 .entry()?
653 .expect("handle_normal_file called with entry-less node");
674 .expect("handle_normal_file called with entry-less node");
654 let mode_changed =
675 let mode_changed =
655 || self.options.check_exec && entry.mode_changed(fs_metadata);
676 || self.options.check_exec && entry.mode_changed(&fs_metadata);
656 let size = entry.size();
677 let size = entry.size();
657 let size_changed = size != truncate_u64(fs_metadata.len());
678 let size_changed = size != truncate_u64(fs_metadata.len());
658 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
679 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
659 // issue6456: Size returned may be longer due to encryption
680 // issue6456: Size returned may be longer due to encryption
660 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
681 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
661 self.push_outcome(Outcome::Unsure, dirstate_node)?
682 self.push_outcome(Outcome::Unsure, dirstate_node)?
662 } else if dirstate_node.has_copy_source()
683 } else if dirstate_node.has_copy_source()
663 || entry.is_from_other_parent()
684 || entry.is_from_other_parent()
664 || (size >= 0 && (size_changed || mode_changed()))
685 || (size >= 0 && (size_changed || mode_changed()))
665 {
686 {
666 self.push_outcome(Outcome::Modified, dirstate_node)?
687 self.push_outcome(Outcome::Modified, dirstate_node)?
667 } else {
688 } else {
668 let mtime_looks_clean;
689 let mtime_looks_clean;
669 if let Some(dirstate_mtime) = entry.truncated_mtime() {
690 if let Some(dirstate_mtime) = entry.truncated_mtime() {
670 let fs_mtime = TruncatedTimestamp::for_mtime_of(fs_metadata)
691 let fs_mtime = TruncatedTimestamp::for_mtime_of(&fs_metadata)
671 .expect("OS/libc does not support mtime?");
692 .expect("OS/libc does not support mtime?");
672 // There might be a change in the future if for example the
693 // There might be a change in the future if for example the
673 // internal clock become off while process run, but this is a
694 // internal clock become off while process run, but this is a
674 // case where the issues the user would face
695 // case where the issues the user would face
675 // would be a lot worse and there is nothing we
696 // would be a lot worse and there is nothing we
676 // can really do.
697 // can really do.
677 mtime_looks_clean = fs_mtime.likely_equal(dirstate_mtime)
698 mtime_looks_clean = fs_mtime.likely_equal(dirstate_mtime)
678 } else {
699 } else {
679 // No mtime in the dirstate entry
700 // No mtime in the dirstate entry
680 mtime_looks_clean = false
701 mtime_looks_clean = false
681 };
702 };
682 if !mtime_looks_clean {
703 if !mtime_looks_clean {
683 self.push_outcome(Outcome::Unsure, dirstate_node)?
704 self.push_outcome(Outcome::Unsure, dirstate_node)?
684 } else if self.options.list_clean {
705 } else if self.options.list_clean {
685 self.push_outcome(Outcome::Clean, dirstate_node)?
706 self.push_outcome(Outcome::Clean, dirstate_node)?
686 }
707 }
687 }
708 }
688 Ok(())
709 Ok(())
689 }
710 }
690
711
691 /// A node in the dirstate tree has no corresponding filesystem entry
712 /// A node in the dirstate tree has no corresponding filesystem entry
692 fn traverse_dirstate_only(
713 fn traverse_dirstate_only(
693 &self,
714 &self,
694 dirstate_node: NodeRef<'tree, 'on_disk>,
715 dirstate_node: NodeRef<'tree, 'on_disk>,
695 ) -> Result<(), DirstateV2ParseError> {
716 ) -> Result<(), DirstateV2ParseError> {
696 self.check_for_outdated_directory_cache(&dirstate_node)?;
717 self.check_for_outdated_directory_cache(&dirstate_node)?;
697 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
718 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
698 dirstate_node
719 dirstate_node
699 .children(self.dmap.on_disk)?
720 .children(self.dmap.on_disk)?
700 .par_iter()
721 .par_iter()
701 .map(|child_node| self.traverse_dirstate_only(child_node))
722 .map(|child_node| self.traverse_dirstate_only(child_node))
702 .collect()
723 .collect()
703 }
724 }
704
725
705 /// A node in the dirstate tree has no corresponding *file* on the
726 /// A node in the dirstate tree has no corresponding *file* on the
706 /// filesystem
727 /// filesystem
707 ///
728 ///
708 /// Does nothing on a "directory" node
729 /// Does nothing on a "directory" node
709 fn mark_removed_or_deleted_if_file(
730 fn mark_removed_or_deleted_if_file(
710 &self,
731 &self,
711 dirstate_node: &NodeRef<'tree, 'on_disk>,
732 dirstate_node: &NodeRef<'tree, 'on_disk>,
712 ) -> Result<(), DirstateV2ParseError> {
733 ) -> Result<(), DirstateV2ParseError> {
713 if let Some(entry) = dirstate_node.entry()? {
734 if let Some(entry) = dirstate_node.entry()? {
714 if !entry.any_tracked() {
735 if !entry.any_tracked() {
715 // Future-compat for when we start storing ignored and unknown
736 // Future-compat for when we start storing ignored and unknown
716 // files for caching reasons
737 // files for caching reasons
717 return Ok(());
738 return Ok(());
718 }
739 }
719 let path = dirstate_node.full_path(self.dmap.on_disk)?;
740 let path = dirstate_node.full_path(self.dmap.on_disk)?;
720 if self.matcher.matches(path) {
741 if self.matcher.matches(path) {
721 if entry.removed() {
742 if entry.removed() {
722 self.push_outcome(Outcome::Removed, dirstate_node)?
743 self.push_outcome(Outcome::Removed, dirstate_node)?
723 } else {
744 } else {
724 self.push_outcome(Outcome::Deleted, &dirstate_node)?
745 self.push_outcome(Outcome::Deleted, &dirstate_node)?
725 }
746 }
726 }
747 }
727 }
748 }
728 Ok(())
749 Ok(())
729 }
750 }
730
751
731 /// Something in the filesystem has no corresponding dirstate node
752 /// Something in the filesystem has no corresponding dirstate node
732 ///
753 ///
733 /// Returns whether that path is ignored
754 /// Returns whether that path is ignored
734 fn traverse_fs_only(
755 fn traverse_fs_only(
735 &self,
756 &self,
736 has_ignored_ancestor: bool,
757 has_ignored_ancestor: bool,
737 directory_hg_path: &HgPath,
758 directory_hg_path: &HgPath,
738 fs_entry: &DirEntry,
759 fs_entry: &DirEntry,
739 ) -> bool {
760 ) -> bool {
740 let hg_path = directory_hg_path.join(&fs_entry.hg_path);
761 let hg_path = directory_hg_path.join(&fs_entry.hg_path);
741 let file_type = fs_entry.metadata.file_type();
762 let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink();
742 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
763 if fs_entry.is_dir() {
743 if file_type.is_dir() {
744 let is_ignored =
764 let is_ignored =
745 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
765 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
746 let traverse_children = if is_ignored {
766 let traverse_children = if is_ignored {
747 // Descendants of an ignored directory are all ignored
767 // Descendants of an ignored directory are all ignored
748 self.options.list_ignored
768 self.options.list_ignored
749 } else {
769 } else {
750 // Descendants of an unknown directory may be either unknown or
770 // Descendants of an unknown directory may be either unknown or
751 // ignored
771 // ignored
752 self.options.list_unknown || self.options.list_ignored
772 self.options.list_unknown || self.options.list_ignored
753 };
773 };
754 if traverse_children {
774 if traverse_children {
755 let is_at_repo_root = false;
775 let is_at_repo_root = false;
756 if let Ok(children_fs_entries) = self.read_dir(
776 if let Ok(children_fs_entries) =
757 &hg_path,
777 self.read_dir(&hg_path, &fs_entry.fs_path, is_at_repo_root)
758 &fs_entry.fs_path,
778 {
759 is_at_repo_root,
760 ) {
761 children_fs_entries.par_iter().for_each(|child_fs_entry| {
779 children_fs_entries.par_iter().for_each(|child_fs_entry| {
762 self.traverse_fs_only(
780 self.traverse_fs_only(
763 is_ignored,
781 is_ignored,
764 &hg_path,
782 &hg_path,
765 child_fs_entry,
783 child_fs_entry,
766 );
784 );
767 })
785 })
768 }
786 }
769 if self.options.collect_traversed_dirs {
787 if self.options.collect_traversed_dirs {
770 self.outcome.lock().unwrap().traversed.push(hg_path.into())
788 self.outcome.lock().unwrap().traversed.push(hg_path.into())
771 }
789 }
772 }
790 }
773 is_ignored
791 is_ignored
774 } else {
792 } else {
775 if file_or_symlink {
793 if file_or_symlink {
776 if self.matcher.matches(&hg_path) {
794 if self.matcher.matches(&hg_path) {
777 self.mark_unknown_or_ignored(
795 self.mark_unknown_or_ignored(
778 has_ignored_ancestor,
796 has_ignored_ancestor,
779 &BorrowedPath::InMemory(&hg_path),
797 &BorrowedPath::InMemory(&hg_path),
780 )
798 )
781 } else {
799 } else {
782 // We haven’t computed whether this path is ignored. It
800 // We haven’t computed whether this path is ignored. It
783 // might not be, and a future run of status might have a
801 // might not be, and a future run of status might have a
784 // different matcher that matches it. So treat it as not
802 // different matcher that matches it. So treat it as not
785 // ignored. That is, inhibit readdir caching of the parent
803 // ignored. That is, inhibit readdir caching of the parent
786 // directory.
804 // directory.
787 false
805 false
788 }
806 }
789 } else {
807 } else {
790 // This is neither a directory, a plain file, or a symlink.
808 // This is neither a directory, a plain file, or a symlink.
791 // Treat it like an ignored file.
809 // Treat it like an ignored file.
792 true
810 true
793 }
811 }
794 }
812 }
795 }
813 }
796
814
797 /// Returns whether that path is ignored
815 /// Returns whether that path is ignored
798 fn mark_unknown_or_ignored(
816 fn mark_unknown_or_ignored(
799 &self,
817 &self,
800 has_ignored_ancestor: bool,
818 has_ignored_ancestor: bool,
801 hg_path: &BorrowedPath<'_, 'on_disk>,
819 hg_path: &BorrowedPath<'_, 'on_disk>,
802 ) -> bool {
820 ) -> bool {
803 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
821 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
804 if is_ignored {
822 if is_ignored {
805 if self.options.list_ignored {
823 if self.options.list_ignored {
806 self.push_outcome_without_copy_source(
824 self.push_outcome_without_copy_source(
807 Outcome::Ignored,
825 Outcome::Ignored,
808 hg_path,
826 hg_path,
809 )
827 )
810 }
828 }
811 } else {
829 } else {
812 if self.options.list_unknown {
830 if self.options.list_unknown {
813 self.push_outcome_without_copy_source(
831 self.push_outcome_without_copy_source(
814 Outcome::Unknown,
832 Outcome::Unknown,
815 hg_path,
833 hg_path,
816 )
834 )
817 }
835 }
818 }
836 }
819 is_ignored
837 is_ignored
820 }
838 }
821 }
839 }
822
840
823 struct DirEntry {
841 /// Since [`std::fs::FileType`] cannot be built directly, we emulate what we
824 /// Path as stored in the dirstate
842 /// care about.
825 hg_path: HgPathBuf,
843 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
826 /// Filesystem path
844 enum FakeFileType {
827 fs_path: PathBuf,
845 File,
828 metadata: std::fs::Metadata,
846 Directory,
847 Symlink,
829 }
848 }
830
849
831 impl DirEntry {
850 impl TryFrom<std::fs::FileType> for FakeFileType {
832 /// Returns **unsorted** entries in the given directory, with name and
851 type Error = ();
833 /// metadata.
852
853 fn try_from(f: std::fs::FileType) -> Result<Self, Self::Error> {
854 if f.is_dir() {
855 Ok(Self::Directory)
856 } else if f.is_file() {
857 Ok(Self::File)
858 } else if f.is_symlink() {
859 Ok(Self::Symlink)
860 } else {
861 // Things like FIFO etc.
862 Err(())
863 }
864 }
865 }
866
867 struct DirEntry<'a> {
868 /// Path as stored in the dirstate, or just the filename for optimization.
869 hg_path: HgPathCow<'a>,
870 /// Filesystem path
871 fs_path: Cow<'a, Path>,
872 /// Lazily computed
873 symlink_metadata: Option<std::fs::Metadata>,
874 /// Already computed for ergonomics.
875 file_type: FakeFileType,
876 }
877
878 impl<'a> DirEntry<'a> {
879 /// Returns **unsorted** entries in the given directory, with name,
880 /// metadata and file type.
834 ///
881 ///
835 /// If a `.hg` sub-directory is encountered:
882 /// If a `.hg` sub-directory is encountered:
836 ///
883 ///
837 /// * At the repository root, ignore that sub-directory
884 /// * At the repository root, ignore that sub-directory
838 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
885 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
839 /// list instead.
886 /// list instead.
840 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
887 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
841 // `read_dir` returns a "not found" error for the empty path
888 // `read_dir` returns a "not found" error for the empty path
842 let at_cwd = path == Path::new("");
889 let at_cwd = path == Path::new("");
843 let read_dir_path = if at_cwd { Path::new(".") } else { path };
890 let read_dir_path = if at_cwd { Path::new(".") } else { path };
844 let mut results = Vec::new();
891 let mut results = Vec::new();
845 for entry in read_dir_path.read_dir()? {
892 for entry in read_dir_path.read_dir()? {
846 let entry = entry?;
893 let entry = entry?;
847 let metadata = match entry.metadata() {
894 let file_type = match entry.file_type() {
848 Ok(v) => v,
895 Ok(v) => v,
849 Err(e) => {
896 Err(e) => {
850 // race with file deletion?
897 // race with file deletion?
851 if e.kind() == std::io::ErrorKind::NotFound {
898 if e.kind() == std::io::ErrorKind::NotFound {
852 continue;
899 continue;
853 } else {
900 } else {
854 return Err(e);
901 return Err(e);
855 }
902 }
856 }
903 }
857 };
904 };
858 let file_name = entry.file_name();
905 let file_name = entry.file_name();
859 // FIXME don't do this when cached
906 // FIXME don't do this when cached
860 if file_name == ".hg" {
907 if file_name == ".hg" {
861 if is_at_repo_root {
908 if is_at_repo_root {
862 // Skip the repo’s own .hg (might be a symlink)
909 // Skip the repo’s own .hg (might be a symlink)
863 continue;
910 continue;
864 } else if metadata.is_dir() {
911 } else if file_type.is_dir() {
865 // A .hg sub-directory at another location means a subrepo,
912 // A .hg sub-directory at another location means a subrepo,
866 // skip it entirely.
913 // skip it entirely.
867 return Ok(Vec::new());
914 return Ok(Vec::new());
868 }
915 }
869 }
916 }
870 let full_path = if at_cwd {
917 let full_path = if at_cwd {
871 file_name.clone().into()
918 file_name.clone().into()
872 } else {
919 } else {
873 entry.path()
920 entry.path()
874 };
921 };
875 let base_name = get_bytes_from_os_string(file_name).into();
922 let filename =
923 Cow::Owned(get_bytes_from_os_string(file_name).into());
924 let file_type = match FakeFileType::try_from(file_type) {
925 Ok(file_type) => file_type,
926 Err(_) => continue,
927 };
876 results.push(DirEntry {
928 results.push(DirEntry {
877 hg_path: base_name,
929 hg_path: filename,
878 fs_path: full_path,
930 fs_path: Cow::Owned(full_path.to_path_buf()),
879 metadata,
931 symlink_metadata: None,
932 file_type,
880 })
933 })
881 }
934 }
882 Ok(results)
935 Ok(results)
883 }
936 }
937
938 fn symlink_metadata(&self) -> Result<std::fs::Metadata, std::io::Error> {
939 match &self.symlink_metadata {
940 Some(meta) => Ok(meta.clone()),
941 None => std::fs::symlink_metadata(&self.fs_path),
942 }
943 }
944
945 fn is_dir(&self) -> bool {
946 self.file_type == FakeFileType::Directory
947 }
948
949 fn is_file(&self) -> bool {
950 self.file_type == FakeFileType::File
951 }
952
953 fn is_symlink(&self) -> bool {
954 self.file_type == FakeFileType::Symlink
955 }
884 }
956 }
885
957
886 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
958 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
887 /// of the give repository.
959 /// of the give repository.
888 ///
960 ///
889 /// This is similar to `SystemTime::now()`, with the result truncated to the
961 /// This is similar to `SystemTime::now()`, with the result truncated to the
890 /// same time resolution as other files’ modification times. Using `.hg`
962 /// same time resolution as other files’ modification times. Using `.hg`
891 /// instead of the system’s default temporary directory (such as `/tmp`) makes
963 /// instead of the system’s default temporary directory (such as `/tmp`) makes
892 /// it more likely the temporary file is in the same disk partition as contents
964 /// it more likely the temporary file is in the same disk partition as contents
893 /// of the working directory, which can matter since different filesystems may
965 /// of the working directory, which can matter since different filesystems may
894 /// store timestamps with different resolutions.
966 /// store timestamps with different resolutions.
895 ///
967 ///
896 /// This may fail, typically if we lack write permissions. In that case we
968 /// This may fail, typically if we lack write permissions. In that case we
897 /// should continue the `status()` algoritm anyway and consider the current
969 /// should continue the `status()` algoritm anyway and consider the current
898 /// date/time to be unknown.
970 /// date/time to be unknown.
899 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
971 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
900 tempfile::tempfile_in(repo_root.join(".hg"))?
972 tempfile::tempfile_in(repo_root.join(".hg"))?
901 .metadata()?
973 .metadata()?
902 .modified()
974 .modified()
903 }
975 }
General Comments 0
You need to be logged in to leave comments. Login now