##// END OF EJS Templates
rust-status: cap the number of concurrent threads to 16...
Raphaël Gomès -
r50215:e2f8ed37 stable
parent child Browse files
Show More
@@ -1,837 +1,848 b''
1 use crate::dirstate::entry::TruncatedTimestamp;
1 use crate::dirstate::entry::TruncatedTimestamp;
2 use crate::dirstate::status::IgnoreFnType;
2 use crate::dirstate::status::IgnoreFnType;
3 use crate::dirstate::status::StatusPath;
3 use crate::dirstate::status::StatusPath;
4 use crate::dirstate_tree::dirstate_map::BorrowedPath;
4 use crate::dirstate_tree::dirstate_map::BorrowedPath;
5 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
5 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
6 use crate::dirstate_tree::dirstate_map::DirstateMap;
6 use crate::dirstate_tree::dirstate_map::DirstateMap;
7 use crate::dirstate_tree::dirstate_map::NodeData;
7 use crate::dirstate_tree::dirstate_map::NodeData;
8 use crate::dirstate_tree::dirstate_map::NodeRef;
8 use crate::dirstate_tree::dirstate_map::NodeRef;
9 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
9 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
10 use crate::matchers::get_ignore_function;
10 use crate::matchers::get_ignore_function;
11 use crate::matchers::Matcher;
11 use crate::matchers::Matcher;
12 use crate::utils::files::get_bytes_from_os_string;
12 use crate::utils::files::get_bytes_from_os_string;
13 use crate::utils::files::get_path_from_bytes;
13 use crate::utils::files::get_path_from_bytes;
14 use crate::utils::hg_path::HgPath;
14 use crate::utils::hg_path::HgPath;
15 use crate::BadMatch;
15 use crate::BadMatch;
16 use crate::DirstateStatus;
16 use crate::DirstateStatus;
17 use crate::EntryState;
17 use crate::EntryState;
18 use crate::HgPathBuf;
18 use crate::HgPathBuf;
19 use crate::HgPathCow;
19 use crate::HgPathCow;
20 use crate::PatternFileWarning;
20 use crate::PatternFileWarning;
21 use crate::StatusError;
21 use crate::StatusError;
22 use crate::StatusOptions;
22 use crate::StatusOptions;
23 use micro_timer::timed;
23 use micro_timer::timed;
24 use rayon::prelude::*;
24 use rayon::prelude::*;
25 use sha1::{Digest, Sha1};
25 use sha1::{Digest, Sha1};
26 use std::borrow::Cow;
26 use std::borrow::Cow;
27 use std::io;
27 use std::io;
28 use std::path::Path;
28 use std::path::Path;
29 use std::path::PathBuf;
29 use std::path::PathBuf;
30 use std::sync::Mutex;
30 use std::sync::Mutex;
31 use std::time::SystemTime;
31 use std::time::SystemTime;
32
32
33 /// Returns the status of the working directory compared to its parent
33 /// Returns the status of the working directory compared to its parent
34 /// changeset.
34 /// changeset.
35 ///
35 ///
36 /// This algorithm is based on traversing the filesystem tree (`fs` in function
36 /// This algorithm is based on traversing the filesystem tree (`fs` in function
37 /// and variable names) and dirstate tree at the same time. The core of this
37 /// and variable names) and dirstate tree at the same time. The core of this
38 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
38 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
39 /// and its use of `itertools::merge_join_by`. When reaching a path that only
39 /// and its use of `itertools::merge_join_by`. When reaching a path that only
40 /// exists in one of the two trees, depending on information requested by
40 /// exists in one of the two trees, depending on information requested by
41 /// `options` we may need to traverse the remaining subtree.
41 /// `options` we may need to traverse the remaining subtree.
42 #[timed]
42 #[timed]
43 pub fn status<'tree, 'on_disk: 'tree>(
43 pub fn status<'tree, 'on_disk: 'tree>(
44 dmap: &'tree mut DirstateMap<'on_disk>,
44 dmap: &'tree mut DirstateMap<'on_disk>,
45 matcher: &(dyn Matcher + Sync),
45 matcher: &(dyn Matcher + Sync),
46 root_dir: PathBuf,
46 root_dir: PathBuf,
47 ignore_files: Vec<PathBuf>,
47 ignore_files: Vec<PathBuf>,
48 options: StatusOptions,
48 options: StatusOptions,
49 ) -> Result<(DirstateStatus<'on_disk>, Vec<PatternFileWarning>), StatusError> {
49 ) -> Result<(DirstateStatus<'on_disk>, Vec<PatternFileWarning>), StatusError> {
50 // Force the global rayon threadpool to not exceed 16 concurrent threads.
51 // This is a stop-gap measure until we figure out why using more than 16
52 // threads makes `status` slower for each additional thread.
53 // We use `ok()` in case the global threadpool has already been
54 // instantiated in `rhg` or some other caller.
55 // TODO find the underlying cause and fix it, then remove this.
56 rayon::ThreadPoolBuilder::new()
57 .num_threads(16)
58 .build_global()
59 .ok();
60
50 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
61 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
51 if options.list_ignored || options.list_unknown {
62 if options.list_ignored || options.list_unknown {
52 let mut hasher = Sha1::new();
63 let mut hasher = Sha1::new();
53 let (ignore_fn, warnings) = get_ignore_function(
64 let (ignore_fn, warnings) = get_ignore_function(
54 ignore_files,
65 ignore_files,
55 &root_dir,
66 &root_dir,
56 &mut |pattern_bytes| hasher.update(pattern_bytes),
67 &mut |pattern_bytes| hasher.update(pattern_bytes),
57 )?;
68 )?;
58 let new_hash = *hasher.finalize().as_ref();
69 let new_hash = *hasher.finalize().as_ref();
59 let changed = new_hash != dmap.ignore_patterns_hash;
70 let changed = new_hash != dmap.ignore_patterns_hash;
60 dmap.ignore_patterns_hash = new_hash;
71 dmap.ignore_patterns_hash = new_hash;
61 (ignore_fn, warnings, Some(changed))
72 (ignore_fn, warnings, Some(changed))
62 } else {
73 } else {
63 (Box::new(|&_| true), vec![], None)
74 (Box::new(|&_| true), vec![], None)
64 };
75 };
65
76
66 let filesystem_time_at_status_start =
77 let filesystem_time_at_status_start =
67 filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
78 filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
68
79
69 // If the repository is under the current directory, prefer using a
80 // If the repository is under the current directory, prefer using a
70 // relative path, so the kernel needs to traverse fewer directory in every
81 // relative path, so the kernel needs to traverse fewer directory in every
71 // call to `read_dir` or `symlink_metadata`.
82 // call to `read_dir` or `symlink_metadata`.
72 // This is effective in the common case where the current directory is the
83 // This is effective in the common case where the current directory is the
73 // repository root.
84 // repository root.
74
85
75 // TODO: Better yet would be to use libc functions like `openat` and
86 // TODO: Better yet would be to use libc functions like `openat` and
76 // `fstatat` to remove such repeated traversals entirely, but the standard
87 // `fstatat` to remove such repeated traversals entirely, but the standard
77 // library does not provide APIs based on those.
88 // library does not provide APIs based on those.
78 // Maybe with a crate like https://crates.io/crates/openat instead?
89 // Maybe with a crate like https://crates.io/crates/openat instead?
79 let root_dir = if let Some(relative) = std::env::current_dir()
90 let root_dir = if let Some(relative) = std::env::current_dir()
80 .ok()
91 .ok()
81 .and_then(|cwd| root_dir.strip_prefix(cwd).ok())
92 .and_then(|cwd| root_dir.strip_prefix(cwd).ok())
82 {
93 {
83 relative
94 relative
84 } else {
95 } else {
85 &root_dir
96 &root_dir
86 };
97 };
87
98
88 let outcome = DirstateStatus {
99 let outcome = DirstateStatus {
89 filesystem_time_at_status_start,
100 filesystem_time_at_status_start,
90 ..Default::default()
101 ..Default::default()
91 };
102 };
92 let common = StatusCommon {
103 let common = StatusCommon {
93 dmap,
104 dmap,
94 options,
105 options,
95 matcher,
106 matcher,
96 ignore_fn,
107 ignore_fn,
97 outcome: Mutex::new(outcome),
108 outcome: Mutex::new(outcome),
98 ignore_patterns_have_changed: patterns_changed,
109 ignore_patterns_have_changed: patterns_changed,
99 new_cachable_directories: Default::default(),
110 new_cachable_directories: Default::default(),
100 outated_cached_directories: Default::default(),
111 outated_cached_directories: Default::default(),
101 filesystem_time_at_status_start,
112 filesystem_time_at_status_start,
102 };
113 };
103 let is_at_repo_root = true;
114 let is_at_repo_root = true;
104 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
115 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
105 let has_ignored_ancestor = false;
116 let has_ignored_ancestor = false;
106 let root_cached_mtime = None;
117 let root_cached_mtime = None;
107 let root_dir_metadata = None;
118 let root_dir_metadata = None;
108 // If the path we have for the repository root is a symlink, do follow it.
119 // If the path we have for the repository root is a symlink, do follow it.
109 // (As opposed to symlinks within the working directory which are not
120 // (As opposed to symlinks within the working directory which are not
110 // followed, using `std::fs::symlink_metadata`.)
121 // followed, using `std::fs::symlink_metadata`.)
111 common.traverse_fs_directory_and_dirstate(
122 common.traverse_fs_directory_and_dirstate(
112 has_ignored_ancestor,
123 has_ignored_ancestor,
113 dmap.root.as_ref(),
124 dmap.root.as_ref(),
114 hg_path,
125 hg_path,
115 &root_dir,
126 &root_dir,
116 root_dir_metadata,
127 root_dir_metadata,
117 root_cached_mtime,
128 root_cached_mtime,
118 is_at_repo_root,
129 is_at_repo_root,
119 )?;
130 )?;
120 let mut outcome = common.outcome.into_inner().unwrap();
131 let mut outcome = common.outcome.into_inner().unwrap();
121 let new_cachable = common.new_cachable_directories.into_inner().unwrap();
132 let new_cachable = common.new_cachable_directories.into_inner().unwrap();
122 let outdated = common.outated_cached_directories.into_inner().unwrap();
133 let outdated = common.outated_cached_directories.into_inner().unwrap();
123
134
124 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
135 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
125 || !outdated.is_empty()
136 || !outdated.is_empty()
126 || !new_cachable.is_empty();
137 || !new_cachable.is_empty();
127
138
128 // Remove outdated mtimes before adding new mtimes, in case a given
139 // Remove outdated mtimes before adding new mtimes, in case a given
129 // directory is both
140 // directory is both
130 for path in &outdated {
141 for path in &outdated {
131 let node = dmap.get_or_insert(path)?;
142 let node = dmap.get_or_insert(path)?;
132 if let NodeData::CachedDirectory { .. } = &node.data {
143 if let NodeData::CachedDirectory { .. } = &node.data {
133 node.data = NodeData::None
144 node.data = NodeData::None
134 }
145 }
135 }
146 }
136 for (path, mtime) in &new_cachable {
147 for (path, mtime) in &new_cachable {
137 let node = dmap.get_or_insert(path)?;
148 let node = dmap.get_or_insert(path)?;
138 match &node.data {
149 match &node.data {
139 NodeData::Entry(_) => {} // Don’t overwrite an entry
150 NodeData::Entry(_) => {} // Don’t overwrite an entry
140 NodeData::CachedDirectory { .. } | NodeData::None => {
151 NodeData::CachedDirectory { .. } | NodeData::None => {
141 node.data = NodeData::CachedDirectory { mtime: *mtime }
152 node.data = NodeData::CachedDirectory { mtime: *mtime }
142 }
153 }
143 }
154 }
144 }
155 }
145
156
146 Ok((outcome, warnings))
157 Ok((outcome, warnings))
147 }
158 }
148
159
149 /// Bag of random things needed by various parts of the algorithm. Reduces the
160 /// Bag of random things needed by various parts of the algorithm. Reduces the
150 /// number of parameters passed to functions.
161 /// number of parameters passed to functions.
151 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
162 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
152 dmap: &'tree DirstateMap<'on_disk>,
163 dmap: &'tree DirstateMap<'on_disk>,
153 options: StatusOptions,
164 options: StatusOptions,
154 matcher: &'a (dyn Matcher + Sync),
165 matcher: &'a (dyn Matcher + Sync),
155 ignore_fn: IgnoreFnType<'a>,
166 ignore_fn: IgnoreFnType<'a>,
156 outcome: Mutex<DirstateStatus<'on_disk>>,
167 outcome: Mutex<DirstateStatus<'on_disk>>,
157 new_cachable_directories:
168 new_cachable_directories:
158 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
169 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
159 outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
170 outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
160
171
161 /// Whether ignore files like `.hgignore` have changed since the previous
172 /// Whether ignore files like `.hgignore` have changed since the previous
162 /// time a `status()` call wrote their hash to the dirstate. `None` means
173 /// time a `status()` call wrote their hash to the dirstate. `None` means
163 /// we don’t know as this run doesn’t list either ignored or uknown files
174 /// we don’t know as this run doesn’t list either ignored or uknown files
164 /// and therefore isn’t reading `.hgignore`.
175 /// and therefore isn’t reading `.hgignore`.
165 ignore_patterns_have_changed: Option<bool>,
176 ignore_patterns_have_changed: Option<bool>,
166
177
167 /// The current time at the start of the `status()` algorithm, as measured
178 /// The current time at the start of the `status()` algorithm, as measured
168 /// and possibly truncated by the filesystem.
179 /// and possibly truncated by the filesystem.
169 filesystem_time_at_status_start: Option<TruncatedTimestamp>,
180 filesystem_time_at_status_start: Option<TruncatedTimestamp>,
170 }
181 }
171
182
172 enum Outcome {
183 enum Outcome {
173 Modified,
184 Modified,
174 Added,
185 Added,
175 Removed,
186 Removed,
176 Deleted,
187 Deleted,
177 Clean,
188 Clean,
178 Ignored,
189 Ignored,
179 Unknown,
190 Unknown,
180 Unsure,
191 Unsure,
181 }
192 }
182
193
183 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
194 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
184 fn push_outcome(
195 fn push_outcome(
185 &self,
196 &self,
186 which: Outcome,
197 which: Outcome,
187 dirstate_node: &NodeRef<'tree, 'on_disk>,
198 dirstate_node: &NodeRef<'tree, 'on_disk>,
188 ) -> Result<(), DirstateV2ParseError> {
199 ) -> Result<(), DirstateV2ParseError> {
189 let path = dirstate_node
200 let path = dirstate_node
190 .full_path_borrowed(self.dmap.on_disk)?
201 .full_path_borrowed(self.dmap.on_disk)?
191 .detach_from_tree();
202 .detach_from_tree();
192 let copy_source = if self.options.list_copies {
203 let copy_source = if self.options.list_copies {
193 dirstate_node
204 dirstate_node
194 .copy_source_borrowed(self.dmap.on_disk)?
205 .copy_source_borrowed(self.dmap.on_disk)?
195 .map(|source| source.detach_from_tree())
206 .map(|source| source.detach_from_tree())
196 } else {
207 } else {
197 None
208 None
198 };
209 };
199 self.push_outcome_common(which, path, copy_source);
210 self.push_outcome_common(which, path, copy_source);
200 Ok(())
211 Ok(())
201 }
212 }
202
213
203 fn push_outcome_without_copy_source(
214 fn push_outcome_without_copy_source(
204 &self,
215 &self,
205 which: Outcome,
216 which: Outcome,
206 path: &BorrowedPath<'_, 'on_disk>,
217 path: &BorrowedPath<'_, 'on_disk>,
207 ) {
218 ) {
208 self.push_outcome_common(which, path.detach_from_tree(), None)
219 self.push_outcome_common(which, path.detach_from_tree(), None)
209 }
220 }
210
221
211 fn push_outcome_common(
222 fn push_outcome_common(
212 &self,
223 &self,
213 which: Outcome,
224 which: Outcome,
214 path: HgPathCow<'on_disk>,
225 path: HgPathCow<'on_disk>,
215 copy_source: Option<HgPathCow<'on_disk>>,
226 copy_source: Option<HgPathCow<'on_disk>>,
216 ) {
227 ) {
217 let mut outcome = self.outcome.lock().unwrap();
228 let mut outcome = self.outcome.lock().unwrap();
218 let vec = match which {
229 let vec = match which {
219 Outcome::Modified => &mut outcome.modified,
230 Outcome::Modified => &mut outcome.modified,
220 Outcome::Added => &mut outcome.added,
231 Outcome::Added => &mut outcome.added,
221 Outcome::Removed => &mut outcome.removed,
232 Outcome::Removed => &mut outcome.removed,
222 Outcome::Deleted => &mut outcome.deleted,
233 Outcome::Deleted => &mut outcome.deleted,
223 Outcome::Clean => &mut outcome.clean,
234 Outcome::Clean => &mut outcome.clean,
224 Outcome::Ignored => &mut outcome.ignored,
235 Outcome::Ignored => &mut outcome.ignored,
225 Outcome::Unknown => &mut outcome.unknown,
236 Outcome::Unknown => &mut outcome.unknown,
226 Outcome::Unsure => &mut outcome.unsure,
237 Outcome::Unsure => &mut outcome.unsure,
227 };
238 };
228 vec.push(StatusPath { path, copy_source });
239 vec.push(StatusPath { path, copy_source });
229 }
240 }
230
241
231 fn read_dir(
242 fn read_dir(
232 &self,
243 &self,
233 hg_path: &HgPath,
244 hg_path: &HgPath,
234 fs_path: &Path,
245 fs_path: &Path,
235 is_at_repo_root: bool,
246 is_at_repo_root: bool,
236 ) -> Result<Vec<DirEntry>, ()> {
247 ) -> Result<Vec<DirEntry>, ()> {
237 DirEntry::read_dir(fs_path, is_at_repo_root)
248 DirEntry::read_dir(fs_path, is_at_repo_root)
238 .map_err(|error| self.io_error(error, hg_path))
249 .map_err(|error| self.io_error(error, hg_path))
239 }
250 }
240
251
241 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
252 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
242 let errno = error.raw_os_error().expect("expected real OS error");
253 let errno = error.raw_os_error().expect("expected real OS error");
243 self.outcome
254 self.outcome
244 .lock()
255 .lock()
245 .unwrap()
256 .unwrap()
246 .bad
257 .bad
247 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
258 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
248 }
259 }
249
260
250 fn check_for_outdated_directory_cache(
261 fn check_for_outdated_directory_cache(
251 &self,
262 &self,
252 dirstate_node: &NodeRef<'tree, 'on_disk>,
263 dirstate_node: &NodeRef<'tree, 'on_disk>,
253 ) -> Result<(), DirstateV2ParseError> {
264 ) -> Result<(), DirstateV2ParseError> {
254 if self.ignore_patterns_have_changed == Some(true)
265 if self.ignore_patterns_have_changed == Some(true)
255 && dirstate_node.cached_directory_mtime()?.is_some()
266 && dirstate_node.cached_directory_mtime()?.is_some()
256 {
267 {
257 self.outated_cached_directories.lock().unwrap().push(
268 self.outated_cached_directories.lock().unwrap().push(
258 dirstate_node
269 dirstate_node
259 .full_path_borrowed(self.dmap.on_disk)?
270 .full_path_borrowed(self.dmap.on_disk)?
260 .detach_from_tree(),
271 .detach_from_tree(),
261 )
272 )
262 }
273 }
263 Ok(())
274 Ok(())
264 }
275 }
265
276
266 /// If this returns true, we can get accurate results by only using
277 /// If this returns true, we can get accurate results by only using
267 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
278 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
268 /// need to call `read_dir`.
279 /// need to call `read_dir`.
269 fn can_skip_fs_readdir(
280 fn can_skip_fs_readdir(
270 &self,
281 &self,
271 directory_metadata: Option<&std::fs::Metadata>,
282 directory_metadata: Option<&std::fs::Metadata>,
272 cached_directory_mtime: Option<TruncatedTimestamp>,
283 cached_directory_mtime: Option<TruncatedTimestamp>,
273 ) -> bool {
284 ) -> bool {
274 if !self.options.list_unknown && !self.options.list_ignored {
285 if !self.options.list_unknown && !self.options.list_ignored {
275 // All states that we care about listing have corresponding
286 // All states that we care about listing have corresponding
276 // dirstate entries.
287 // dirstate entries.
277 // This happens for example with `hg status -mard`.
288 // This happens for example with `hg status -mard`.
278 return true;
289 return true;
279 }
290 }
280 if !self.options.list_ignored
291 if !self.options.list_ignored
281 && self.ignore_patterns_have_changed == Some(false)
292 && self.ignore_patterns_have_changed == Some(false)
282 {
293 {
283 if let Some(cached_mtime) = cached_directory_mtime {
294 if let Some(cached_mtime) = cached_directory_mtime {
284 // The dirstate contains a cached mtime for this directory, set
295 // The dirstate contains a cached mtime for this directory, set
285 // by a previous run of the `status` algorithm which found this
296 // by a previous run of the `status` algorithm which found this
286 // directory eligible for `read_dir` caching.
297 // directory eligible for `read_dir` caching.
287 if let Some(meta) = directory_metadata {
298 if let Some(meta) = directory_metadata {
288 if cached_mtime
299 if cached_mtime
289 .likely_equal_to_mtime_of(meta)
300 .likely_equal_to_mtime_of(meta)
290 .unwrap_or(false)
301 .unwrap_or(false)
291 {
302 {
292 // The mtime of that directory has not changed
303 // The mtime of that directory has not changed
293 // since then, which means that the results of
304 // since then, which means that the results of
294 // `read_dir` should also be unchanged.
305 // `read_dir` should also be unchanged.
295 return true;
306 return true;
296 }
307 }
297 }
308 }
298 }
309 }
299 }
310 }
300 false
311 false
301 }
312 }
302
313
303 /// Returns whether all child entries of the filesystem directory have a
314 /// Returns whether all child entries of the filesystem directory have a
304 /// corresponding dirstate node or are ignored.
315 /// corresponding dirstate node or are ignored.
305 fn traverse_fs_directory_and_dirstate(
316 fn traverse_fs_directory_and_dirstate(
306 &self,
317 &self,
307 has_ignored_ancestor: bool,
318 has_ignored_ancestor: bool,
308 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
319 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
309 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
320 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
310 directory_fs_path: &Path,
321 directory_fs_path: &Path,
311 directory_metadata: Option<&std::fs::Metadata>,
322 directory_metadata: Option<&std::fs::Metadata>,
312 cached_directory_mtime: Option<TruncatedTimestamp>,
323 cached_directory_mtime: Option<TruncatedTimestamp>,
313 is_at_repo_root: bool,
324 is_at_repo_root: bool,
314 ) -> Result<bool, DirstateV2ParseError> {
325 ) -> Result<bool, DirstateV2ParseError> {
315 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
326 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
316 {
327 {
317 dirstate_nodes
328 dirstate_nodes
318 .par_iter()
329 .par_iter()
319 .map(|dirstate_node| {
330 .map(|dirstate_node| {
320 let fs_path = directory_fs_path.join(get_path_from_bytes(
331 let fs_path = directory_fs_path.join(get_path_from_bytes(
321 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
332 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
322 ));
333 ));
323 match std::fs::symlink_metadata(&fs_path) {
334 match std::fs::symlink_metadata(&fs_path) {
324 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
335 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
325 &fs_path,
336 &fs_path,
326 &fs_metadata,
337 &fs_metadata,
327 dirstate_node,
338 dirstate_node,
328 has_ignored_ancestor,
339 has_ignored_ancestor,
329 ),
340 ),
330 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
341 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
331 self.traverse_dirstate_only(dirstate_node)
342 self.traverse_dirstate_only(dirstate_node)
332 }
343 }
333 Err(error) => {
344 Err(error) => {
334 let hg_path =
345 let hg_path =
335 dirstate_node.full_path(self.dmap.on_disk)?;
346 dirstate_node.full_path(self.dmap.on_disk)?;
336 Ok(self.io_error(error, hg_path))
347 Ok(self.io_error(error, hg_path))
337 }
348 }
338 }
349 }
339 })
350 })
340 .collect::<Result<_, _>>()?;
351 .collect::<Result<_, _>>()?;
341
352
342 // We don’t know, so conservatively say this isn’t the case
353 // We don’t know, so conservatively say this isn’t the case
343 let children_all_have_dirstate_node_or_are_ignored = false;
354 let children_all_have_dirstate_node_or_are_ignored = false;
344
355
345 return Ok(children_all_have_dirstate_node_or_are_ignored);
356 return Ok(children_all_have_dirstate_node_or_are_ignored);
346 }
357 }
347
358
348 let mut fs_entries = if let Ok(entries) = self.read_dir(
359 let mut fs_entries = if let Ok(entries) = self.read_dir(
349 directory_hg_path,
360 directory_hg_path,
350 directory_fs_path,
361 directory_fs_path,
351 is_at_repo_root,
362 is_at_repo_root,
352 ) {
363 ) {
353 entries
364 entries
354 } else {
365 } else {
355 // Treat an unreadable directory (typically because of insufficient
366 // Treat an unreadable directory (typically because of insufficient
356 // permissions) like an empty directory. `self.read_dir` has
367 // permissions) like an empty directory. `self.read_dir` has
357 // already called `self.io_error` so a warning will be emitted.
368 // already called `self.io_error` so a warning will be emitted.
358 Vec::new()
369 Vec::new()
359 };
370 };
360
371
361 // `merge_join_by` requires both its input iterators to be sorted:
372 // `merge_join_by` requires both its input iterators to be sorted:
362
373
363 let dirstate_nodes = dirstate_nodes.sorted();
374 let dirstate_nodes = dirstate_nodes.sorted();
364 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
375 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
365 // https://github.com/rust-lang/rust/issues/34162
376 // https://github.com/rust-lang/rust/issues/34162
366 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
377 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
367
378
368 // Propagate here any error that would happen inside the comparison
379 // Propagate here any error that would happen inside the comparison
369 // callback below
380 // callback below
370 for dirstate_node in &dirstate_nodes {
381 for dirstate_node in &dirstate_nodes {
371 dirstate_node.base_name(self.dmap.on_disk)?;
382 dirstate_node.base_name(self.dmap.on_disk)?;
372 }
383 }
373 itertools::merge_join_by(
384 itertools::merge_join_by(
374 dirstate_nodes,
385 dirstate_nodes,
375 &fs_entries,
386 &fs_entries,
376 |dirstate_node, fs_entry| {
387 |dirstate_node, fs_entry| {
377 // This `unwrap` never panics because we already propagated
388 // This `unwrap` never panics because we already propagated
378 // those errors above
389 // those errors above
379 dirstate_node
390 dirstate_node
380 .base_name(self.dmap.on_disk)
391 .base_name(self.dmap.on_disk)
381 .unwrap()
392 .unwrap()
382 .cmp(&fs_entry.base_name)
393 .cmp(&fs_entry.base_name)
383 },
394 },
384 )
395 )
385 .par_bridge()
396 .par_bridge()
386 .map(|pair| {
397 .map(|pair| {
387 use itertools::EitherOrBoth::*;
398 use itertools::EitherOrBoth::*;
388 let has_dirstate_node_or_is_ignored;
399 let has_dirstate_node_or_is_ignored;
389 match pair {
400 match pair {
390 Both(dirstate_node, fs_entry) => {
401 Both(dirstate_node, fs_entry) => {
391 self.traverse_fs_and_dirstate(
402 self.traverse_fs_and_dirstate(
392 &fs_entry.full_path,
403 &fs_entry.full_path,
393 &fs_entry.metadata,
404 &fs_entry.metadata,
394 dirstate_node,
405 dirstate_node,
395 has_ignored_ancestor,
406 has_ignored_ancestor,
396 )?;
407 )?;
397 has_dirstate_node_or_is_ignored = true
408 has_dirstate_node_or_is_ignored = true
398 }
409 }
399 Left(dirstate_node) => {
410 Left(dirstate_node) => {
400 self.traverse_dirstate_only(dirstate_node)?;
411 self.traverse_dirstate_only(dirstate_node)?;
401 has_dirstate_node_or_is_ignored = true;
412 has_dirstate_node_or_is_ignored = true;
402 }
413 }
403 Right(fs_entry) => {
414 Right(fs_entry) => {
404 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
415 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
405 has_ignored_ancestor,
416 has_ignored_ancestor,
406 directory_hg_path,
417 directory_hg_path,
407 fs_entry,
418 fs_entry,
408 )
419 )
409 }
420 }
410 }
421 }
411 Ok(has_dirstate_node_or_is_ignored)
422 Ok(has_dirstate_node_or_is_ignored)
412 })
423 })
413 .try_reduce(|| true, |a, b| Ok(a && b))
424 .try_reduce(|| true, |a, b| Ok(a && b))
414 }
425 }
415
426
416 fn traverse_fs_and_dirstate(
427 fn traverse_fs_and_dirstate(
417 &self,
428 &self,
418 fs_path: &Path,
429 fs_path: &Path,
419 fs_metadata: &std::fs::Metadata,
430 fs_metadata: &std::fs::Metadata,
420 dirstate_node: NodeRef<'tree, 'on_disk>,
431 dirstate_node: NodeRef<'tree, 'on_disk>,
421 has_ignored_ancestor: bool,
432 has_ignored_ancestor: bool,
422 ) -> Result<(), DirstateV2ParseError> {
433 ) -> Result<(), DirstateV2ParseError> {
423 self.check_for_outdated_directory_cache(&dirstate_node)?;
434 self.check_for_outdated_directory_cache(&dirstate_node)?;
424 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
435 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
425 let file_type = fs_metadata.file_type();
436 let file_type = fs_metadata.file_type();
426 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
437 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
427 if !file_or_symlink {
438 if !file_or_symlink {
428 // If we previously had a file here, it was removed (with
439 // If we previously had a file here, it was removed (with
429 // `hg rm` or similar) or deleted before it could be
440 // `hg rm` or similar) or deleted before it could be
430 // replaced by a directory or something else.
441 // replaced by a directory or something else.
431 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
442 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
432 }
443 }
433 if file_type.is_dir() {
444 if file_type.is_dir() {
434 if self.options.collect_traversed_dirs {
445 if self.options.collect_traversed_dirs {
435 self.outcome
446 self.outcome
436 .lock()
447 .lock()
437 .unwrap()
448 .unwrap()
438 .traversed
449 .traversed
439 .push(hg_path.detach_from_tree())
450 .push(hg_path.detach_from_tree())
440 }
451 }
441 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
452 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
442 let is_at_repo_root = false;
453 let is_at_repo_root = false;
443 let children_all_have_dirstate_node_or_are_ignored = self
454 let children_all_have_dirstate_node_or_are_ignored = self
444 .traverse_fs_directory_and_dirstate(
455 .traverse_fs_directory_and_dirstate(
445 is_ignored,
456 is_ignored,
446 dirstate_node.children(self.dmap.on_disk)?,
457 dirstate_node.children(self.dmap.on_disk)?,
447 hg_path,
458 hg_path,
448 fs_path,
459 fs_path,
449 Some(fs_metadata),
460 Some(fs_metadata),
450 dirstate_node.cached_directory_mtime()?,
461 dirstate_node.cached_directory_mtime()?,
451 is_at_repo_root,
462 is_at_repo_root,
452 )?;
463 )?;
453 self.maybe_save_directory_mtime(
464 self.maybe_save_directory_mtime(
454 children_all_have_dirstate_node_or_are_ignored,
465 children_all_have_dirstate_node_or_are_ignored,
455 fs_metadata,
466 fs_metadata,
456 dirstate_node,
467 dirstate_node,
457 )?
468 )?
458 } else {
469 } else {
459 if file_or_symlink && self.matcher.matches(hg_path) {
470 if file_or_symlink && self.matcher.matches(hg_path) {
460 if let Some(state) = dirstate_node.state()? {
471 if let Some(state) = dirstate_node.state()? {
461 match state {
472 match state {
462 EntryState::Added => {
473 EntryState::Added => {
463 self.push_outcome(Outcome::Added, &dirstate_node)?
474 self.push_outcome(Outcome::Added, &dirstate_node)?
464 }
475 }
465 EntryState::Removed => self
476 EntryState::Removed => self
466 .push_outcome(Outcome::Removed, &dirstate_node)?,
477 .push_outcome(Outcome::Removed, &dirstate_node)?,
467 EntryState::Merged => self
478 EntryState::Merged => self
468 .push_outcome(Outcome::Modified, &dirstate_node)?,
479 .push_outcome(Outcome::Modified, &dirstate_node)?,
469 EntryState::Normal => self
480 EntryState::Normal => self
470 .handle_normal_file(&dirstate_node, fs_metadata)?,
481 .handle_normal_file(&dirstate_node, fs_metadata)?,
471 }
482 }
472 } else {
483 } else {
473 // `node.entry.is_none()` indicates a "directory"
484 // `node.entry.is_none()` indicates a "directory"
474 // node, but the filesystem has a file
485 // node, but the filesystem has a file
475 self.mark_unknown_or_ignored(
486 self.mark_unknown_or_ignored(
476 has_ignored_ancestor,
487 has_ignored_ancestor,
477 hg_path,
488 hg_path,
478 );
489 );
479 }
490 }
480 }
491 }
481
492
482 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
493 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
483 {
494 {
484 self.traverse_dirstate_only(child_node)?
495 self.traverse_dirstate_only(child_node)?
485 }
496 }
486 }
497 }
487 Ok(())
498 Ok(())
488 }
499 }
489
500
490 fn maybe_save_directory_mtime(
501 fn maybe_save_directory_mtime(
491 &self,
502 &self,
492 children_all_have_dirstate_node_or_are_ignored: bool,
503 children_all_have_dirstate_node_or_are_ignored: bool,
493 directory_metadata: &std::fs::Metadata,
504 directory_metadata: &std::fs::Metadata,
494 dirstate_node: NodeRef<'tree, 'on_disk>,
505 dirstate_node: NodeRef<'tree, 'on_disk>,
495 ) -> Result<(), DirstateV2ParseError> {
506 ) -> Result<(), DirstateV2ParseError> {
496 if !children_all_have_dirstate_node_or_are_ignored {
507 if !children_all_have_dirstate_node_or_are_ignored {
497 return Ok(());
508 return Ok(());
498 }
509 }
499 // All filesystem directory entries from `read_dir` have a
510 // All filesystem directory entries from `read_dir` have a
500 // corresponding node in the dirstate, so we can reconstitute the
511 // corresponding node in the dirstate, so we can reconstitute the
501 // names of those entries without calling `read_dir` again.
512 // names of those entries without calling `read_dir` again.
502
513
503 // TODO: use let-else here and below when available:
514 // TODO: use let-else here and below when available:
504 // https://github.com/rust-lang/rust/issues/87335
515 // https://github.com/rust-lang/rust/issues/87335
505 let status_start = if let Some(status_start) =
516 let status_start = if let Some(status_start) =
506 &self.filesystem_time_at_status_start
517 &self.filesystem_time_at_status_start
507 {
518 {
508 status_start
519 status_start
509 } else {
520 } else {
510 return Ok(());
521 return Ok(());
511 };
522 };
512
523
513 // Although the Rust standard library’s `SystemTime` type
524 // Although the Rust standard library’s `SystemTime` type
514 // has nanosecond precision, the times reported for a
525 // has nanosecond precision, the times reported for a
515 // directory’s (or file’s) modified time may have lower
526 // directory’s (or file’s) modified time may have lower
516 // resolution based on the filesystem (for example ext3
527 // resolution based on the filesystem (for example ext3
517 // only stores integer seconds), kernel (see
528 // only stores integer seconds), kernel (see
518 // https://stackoverflow.com/a/14393315/1162888), etc.
529 // https://stackoverflow.com/a/14393315/1162888), etc.
519 let directory_mtime = if let Ok(option) =
530 let directory_mtime = if let Ok(option) =
520 TruncatedTimestamp::for_reliable_mtime_of(
531 TruncatedTimestamp::for_reliable_mtime_of(
521 directory_metadata,
532 directory_metadata,
522 status_start,
533 status_start,
523 ) {
534 ) {
524 if let Some(directory_mtime) = option {
535 if let Some(directory_mtime) = option {
525 directory_mtime
536 directory_mtime
526 } else {
537 } else {
527 // The directory was modified too recently,
538 // The directory was modified too recently,
528 // don’t cache its `read_dir` results.
539 // don’t cache its `read_dir` results.
529 //
540 //
530 // 1. A change to this directory (direct child was
541 // 1. A change to this directory (direct child was
531 // added or removed) cause its mtime to be set
542 // added or removed) cause its mtime to be set
532 // (possibly truncated) to `directory_mtime`
543 // (possibly truncated) to `directory_mtime`
533 // 2. This `status` algorithm calls `read_dir`
544 // 2. This `status` algorithm calls `read_dir`
534 // 3. An other change is made to the same directory is
545 // 3. An other change is made to the same directory is
535 // made so that calling `read_dir` agin would give
546 // made so that calling `read_dir` agin would give
536 // different results, but soon enough after 1. that
547 // different results, but soon enough after 1. that
537 // the mtime stays the same
548 // the mtime stays the same
538 //
549 //
539 // On a system where the time resolution poor, this
550 // On a system where the time resolution poor, this
540 // scenario is not unlikely if all three steps are caused
551 // scenario is not unlikely if all three steps are caused
541 // by the same script.
552 // by the same script.
542 return Ok(());
553 return Ok(());
543 }
554 }
544 } else {
555 } else {
545 // OS/libc does not support mtime?
556 // OS/libc does not support mtime?
546 return Ok(());
557 return Ok(());
547 };
558 };
548 // We’ve observed (through `status_start`) that time has
559 // We’ve observed (through `status_start`) that time has
549 // “progressed” since `directory_mtime`, so any further
560 // “progressed” since `directory_mtime`, so any further
550 // change to this directory is extremely likely to cause a
561 // change to this directory is extremely likely to cause a
551 // different mtime.
562 // different mtime.
552 //
563 //
553 // Having the same mtime again is not entirely impossible
564 // Having the same mtime again is not entirely impossible
554 // since the system clock is not monotonous. It could jump
565 // since the system clock is not monotonous. It could jump
555 // backward to some point before `directory_mtime`, then a
566 // backward to some point before `directory_mtime`, then a
556 // directory change could potentially happen during exactly
567 // directory change could potentially happen during exactly
557 // the wrong tick.
568 // the wrong tick.
558 //
569 //
559 // We deem this scenario (unlike the previous one) to be
570 // We deem this scenario (unlike the previous one) to be
560 // unlikely enough in practice.
571 // unlikely enough in practice.
561
572
562 let is_up_to_date =
573 let is_up_to_date =
563 if let Some(cached) = dirstate_node.cached_directory_mtime()? {
574 if let Some(cached) = dirstate_node.cached_directory_mtime()? {
564 cached.likely_equal(directory_mtime)
575 cached.likely_equal(directory_mtime)
565 } else {
576 } else {
566 false
577 false
567 };
578 };
568 if !is_up_to_date {
579 if !is_up_to_date {
569 let hg_path = dirstate_node
580 let hg_path = dirstate_node
570 .full_path_borrowed(self.dmap.on_disk)?
581 .full_path_borrowed(self.dmap.on_disk)?
571 .detach_from_tree();
582 .detach_from_tree();
572 self.new_cachable_directories
583 self.new_cachable_directories
573 .lock()
584 .lock()
574 .unwrap()
585 .unwrap()
575 .push((hg_path, directory_mtime))
586 .push((hg_path, directory_mtime))
576 }
587 }
577 Ok(())
588 Ok(())
578 }
589 }
579
590
580 /// A file with `EntryState::Normal` in the dirstate was found in the
591 /// A file with `EntryState::Normal` in the dirstate was found in the
581 /// filesystem
592 /// filesystem
582 fn handle_normal_file(
593 fn handle_normal_file(
583 &self,
594 &self,
584 dirstate_node: &NodeRef<'tree, 'on_disk>,
595 dirstate_node: &NodeRef<'tree, 'on_disk>,
585 fs_metadata: &std::fs::Metadata,
596 fs_metadata: &std::fs::Metadata,
586 ) -> Result<(), DirstateV2ParseError> {
597 ) -> Result<(), DirstateV2ParseError> {
587 // Keep the low 31 bits
598 // Keep the low 31 bits
588 fn truncate_u64(value: u64) -> i32 {
599 fn truncate_u64(value: u64) -> i32 {
589 (value & 0x7FFF_FFFF) as i32
600 (value & 0x7FFF_FFFF) as i32
590 }
601 }
591
602
592 let entry = dirstate_node
603 let entry = dirstate_node
593 .entry()?
604 .entry()?
594 .expect("handle_normal_file called with entry-less node");
605 .expect("handle_normal_file called with entry-less node");
595 let mode_changed =
606 let mode_changed =
596 || self.options.check_exec && entry.mode_changed(fs_metadata);
607 || self.options.check_exec && entry.mode_changed(fs_metadata);
597 let size = entry.size();
608 let size = entry.size();
598 let size_changed = size != truncate_u64(fs_metadata.len());
609 let size_changed = size != truncate_u64(fs_metadata.len());
599 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
610 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
600 // issue6456: Size returned may be longer due to encryption
611 // issue6456: Size returned may be longer due to encryption
601 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
612 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
602 self.push_outcome(Outcome::Unsure, dirstate_node)?
613 self.push_outcome(Outcome::Unsure, dirstate_node)?
603 } else if dirstate_node.has_copy_source()
614 } else if dirstate_node.has_copy_source()
604 || entry.is_from_other_parent()
615 || entry.is_from_other_parent()
605 || (size >= 0 && (size_changed || mode_changed()))
616 || (size >= 0 && (size_changed || mode_changed()))
606 {
617 {
607 self.push_outcome(Outcome::Modified, dirstate_node)?
618 self.push_outcome(Outcome::Modified, dirstate_node)?
608 } else {
619 } else {
609 let mtime_looks_clean;
620 let mtime_looks_clean;
610 if let Some(dirstate_mtime) = entry.truncated_mtime() {
621 if let Some(dirstate_mtime) = entry.truncated_mtime() {
611 let fs_mtime = TruncatedTimestamp::for_mtime_of(fs_metadata)
622 let fs_mtime = TruncatedTimestamp::for_mtime_of(fs_metadata)
612 .expect("OS/libc does not support mtime?");
623 .expect("OS/libc does not support mtime?");
613 // There might be a change in the future if for example the
624 // There might be a change in the future if for example the
614 // internal clock become off while process run, but this is a
625 // internal clock become off while process run, but this is a
615 // case where the issues the user would face
626 // case where the issues the user would face
616 // would be a lot worse and there is nothing we
627 // would be a lot worse and there is nothing we
617 // can really do.
628 // can really do.
618 mtime_looks_clean = fs_mtime.likely_equal(dirstate_mtime)
629 mtime_looks_clean = fs_mtime.likely_equal(dirstate_mtime)
619 } else {
630 } else {
620 // No mtime in the dirstate entry
631 // No mtime in the dirstate entry
621 mtime_looks_clean = false
632 mtime_looks_clean = false
622 };
633 };
623 if !mtime_looks_clean {
634 if !mtime_looks_clean {
624 self.push_outcome(Outcome::Unsure, dirstate_node)?
635 self.push_outcome(Outcome::Unsure, dirstate_node)?
625 } else if self.options.list_clean {
636 } else if self.options.list_clean {
626 self.push_outcome(Outcome::Clean, dirstate_node)?
637 self.push_outcome(Outcome::Clean, dirstate_node)?
627 }
638 }
628 }
639 }
629 Ok(())
640 Ok(())
630 }
641 }
631
642
632 /// A node in the dirstate tree has no corresponding filesystem entry
643 /// A node in the dirstate tree has no corresponding filesystem entry
633 fn traverse_dirstate_only(
644 fn traverse_dirstate_only(
634 &self,
645 &self,
635 dirstate_node: NodeRef<'tree, 'on_disk>,
646 dirstate_node: NodeRef<'tree, 'on_disk>,
636 ) -> Result<(), DirstateV2ParseError> {
647 ) -> Result<(), DirstateV2ParseError> {
637 self.check_for_outdated_directory_cache(&dirstate_node)?;
648 self.check_for_outdated_directory_cache(&dirstate_node)?;
638 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
649 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
639 dirstate_node
650 dirstate_node
640 .children(self.dmap.on_disk)?
651 .children(self.dmap.on_disk)?
641 .par_iter()
652 .par_iter()
642 .map(|child_node| self.traverse_dirstate_only(child_node))
653 .map(|child_node| self.traverse_dirstate_only(child_node))
643 .collect()
654 .collect()
644 }
655 }
645
656
646 /// A node in the dirstate tree has no corresponding *file* on the
657 /// A node in the dirstate tree has no corresponding *file* on the
647 /// filesystem
658 /// filesystem
648 ///
659 ///
649 /// Does nothing on a "directory" node
660 /// Does nothing on a "directory" node
650 fn mark_removed_or_deleted_if_file(
661 fn mark_removed_or_deleted_if_file(
651 &self,
662 &self,
652 dirstate_node: &NodeRef<'tree, 'on_disk>,
663 dirstate_node: &NodeRef<'tree, 'on_disk>,
653 ) -> Result<(), DirstateV2ParseError> {
664 ) -> Result<(), DirstateV2ParseError> {
654 if let Some(state) = dirstate_node.state()? {
665 if let Some(state) = dirstate_node.state()? {
655 let path = dirstate_node.full_path(self.dmap.on_disk)?;
666 let path = dirstate_node.full_path(self.dmap.on_disk)?;
656 if self.matcher.matches(path) {
667 if self.matcher.matches(path) {
657 if let EntryState::Removed = state {
668 if let EntryState::Removed = state {
658 self.push_outcome(Outcome::Removed, dirstate_node)?
669 self.push_outcome(Outcome::Removed, dirstate_node)?
659 } else {
670 } else {
660 self.push_outcome(Outcome::Deleted, &dirstate_node)?
671 self.push_outcome(Outcome::Deleted, &dirstate_node)?
661 }
672 }
662 }
673 }
663 }
674 }
664 Ok(())
675 Ok(())
665 }
676 }
666
677
667 /// Something in the filesystem has no corresponding dirstate node
678 /// Something in the filesystem has no corresponding dirstate node
668 ///
679 ///
669 /// Returns whether that path is ignored
680 /// Returns whether that path is ignored
670 fn traverse_fs_only(
681 fn traverse_fs_only(
671 &self,
682 &self,
672 has_ignored_ancestor: bool,
683 has_ignored_ancestor: bool,
673 directory_hg_path: &HgPath,
684 directory_hg_path: &HgPath,
674 fs_entry: &DirEntry,
685 fs_entry: &DirEntry,
675 ) -> bool {
686 ) -> bool {
676 let hg_path = directory_hg_path.join(&fs_entry.base_name);
687 let hg_path = directory_hg_path.join(&fs_entry.base_name);
677 let file_type = fs_entry.metadata.file_type();
688 let file_type = fs_entry.metadata.file_type();
678 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
689 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
679 if file_type.is_dir() {
690 if file_type.is_dir() {
680 let is_ignored =
691 let is_ignored =
681 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
692 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
682 let traverse_children = if is_ignored {
693 let traverse_children = if is_ignored {
683 // Descendants of an ignored directory are all ignored
694 // Descendants of an ignored directory are all ignored
684 self.options.list_ignored
695 self.options.list_ignored
685 } else {
696 } else {
686 // Descendants of an unknown directory may be either unknown or
697 // Descendants of an unknown directory may be either unknown or
687 // ignored
698 // ignored
688 self.options.list_unknown || self.options.list_ignored
699 self.options.list_unknown || self.options.list_ignored
689 };
700 };
690 if traverse_children {
701 if traverse_children {
691 let is_at_repo_root = false;
702 let is_at_repo_root = false;
692 if let Ok(children_fs_entries) = self.read_dir(
703 if let Ok(children_fs_entries) = self.read_dir(
693 &hg_path,
704 &hg_path,
694 &fs_entry.full_path,
705 &fs_entry.full_path,
695 is_at_repo_root,
706 is_at_repo_root,
696 ) {
707 ) {
697 children_fs_entries.par_iter().for_each(|child_fs_entry| {
708 children_fs_entries.par_iter().for_each(|child_fs_entry| {
698 self.traverse_fs_only(
709 self.traverse_fs_only(
699 is_ignored,
710 is_ignored,
700 &hg_path,
711 &hg_path,
701 child_fs_entry,
712 child_fs_entry,
702 );
713 );
703 })
714 })
704 }
715 }
705 }
716 }
706 if self.options.collect_traversed_dirs {
717 if self.options.collect_traversed_dirs {
707 self.outcome.lock().unwrap().traversed.push(hg_path.into())
718 self.outcome.lock().unwrap().traversed.push(hg_path.into())
708 }
719 }
709 is_ignored
720 is_ignored
710 } else {
721 } else {
711 if file_or_symlink {
722 if file_or_symlink {
712 if self.matcher.matches(&hg_path) {
723 if self.matcher.matches(&hg_path) {
713 self.mark_unknown_or_ignored(
724 self.mark_unknown_or_ignored(
714 has_ignored_ancestor,
725 has_ignored_ancestor,
715 &BorrowedPath::InMemory(&hg_path),
726 &BorrowedPath::InMemory(&hg_path),
716 )
727 )
717 } else {
728 } else {
718 // We haven’t computed whether this path is ignored. It
729 // We haven’t computed whether this path is ignored. It
719 // might not be, and a future run of status might have a
730 // might not be, and a future run of status might have a
720 // different matcher that matches it. So treat it as not
731 // different matcher that matches it. So treat it as not
721 // ignored. That is, inhibit readdir caching of the parent
732 // ignored. That is, inhibit readdir caching of the parent
722 // directory.
733 // directory.
723 false
734 false
724 }
735 }
725 } else {
736 } else {
726 // This is neither a directory, a plain file, or a symlink.
737 // This is neither a directory, a plain file, or a symlink.
727 // Treat it like an ignored file.
738 // Treat it like an ignored file.
728 true
739 true
729 }
740 }
730 }
741 }
731 }
742 }
732
743
733 /// Returns whether that path is ignored
744 /// Returns whether that path is ignored
734 fn mark_unknown_or_ignored(
745 fn mark_unknown_or_ignored(
735 &self,
746 &self,
736 has_ignored_ancestor: bool,
747 has_ignored_ancestor: bool,
737 hg_path: &BorrowedPath<'_, 'on_disk>,
748 hg_path: &BorrowedPath<'_, 'on_disk>,
738 ) -> bool {
749 ) -> bool {
739 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
750 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
740 if is_ignored {
751 if is_ignored {
741 if self.options.list_ignored {
752 if self.options.list_ignored {
742 self.push_outcome_without_copy_source(
753 self.push_outcome_without_copy_source(
743 Outcome::Ignored,
754 Outcome::Ignored,
744 hg_path,
755 hg_path,
745 )
756 )
746 }
757 }
747 } else {
758 } else {
748 if self.options.list_unknown {
759 if self.options.list_unknown {
749 self.push_outcome_without_copy_source(
760 self.push_outcome_without_copy_source(
750 Outcome::Unknown,
761 Outcome::Unknown,
751 hg_path,
762 hg_path,
752 )
763 )
753 }
764 }
754 }
765 }
755 is_ignored
766 is_ignored
756 }
767 }
757 }
768 }
758
769
759 struct DirEntry {
770 struct DirEntry {
760 base_name: HgPathBuf,
771 base_name: HgPathBuf,
761 full_path: PathBuf,
772 full_path: PathBuf,
762 metadata: std::fs::Metadata,
773 metadata: std::fs::Metadata,
763 }
774 }
764
775
765 impl DirEntry {
776 impl DirEntry {
766 /// Returns **unsorted** entries in the given directory, with name and
777 /// Returns **unsorted** entries in the given directory, with name and
767 /// metadata.
778 /// metadata.
768 ///
779 ///
769 /// If a `.hg` sub-directory is encountered:
780 /// If a `.hg` sub-directory is encountered:
770 ///
781 ///
771 /// * At the repository root, ignore that sub-directory
782 /// * At the repository root, ignore that sub-directory
772 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
783 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
773 /// list instead.
784 /// list instead.
774 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
785 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
775 // `read_dir` returns a "not found" error for the empty path
786 // `read_dir` returns a "not found" error for the empty path
776 let at_cwd = path == Path::new("");
787 let at_cwd = path == Path::new("");
777 let read_dir_path = if at_cwd { Path::new(".") } else { path };
788 let read_dir_path = if at_cwd { Path::new(".") } else { path };
778 let mut results = Vec::new();
789 let mut results = Vec::new();
779 for entry in read_dir_path.read_dir()? {
790 for entry in read_dir_path.read_dir()? {
780 let entry = entry?;
791 let entry = entry?;
781 let metadata = match entry.metadata() {
792 let metadata = match entry.metadata() {
782 Ok(v) => v,
793 Ok(v) => v,
783 Err(e) => {
794 Err(e) => {
784 // race with file deletion?
795 // race with file deletion?
785 if e.kind() == std::io::ErrorKind::NotFound {
796 if e.kind() == std::io::ErrorKind::NotFound {
786 continue;
797 continue;
787 } else {
798 } else {
788 return Err(e);
799 return Err(e);
789 }
800 }
790 }
801 }
791 };
802 };
792 let file_name = entry.file_name();
803 let file_name = entry.file_name();
793 // FIXME don't do this when cached
804 // FIXME don't do this when cached
794 if file_name == ".hg" {
805 if file_name == ".hg" {
795 if is_at_repo_root {
806 if is_at_repo_root {
796 // Skip the repo’s own .hg (might be a symlink)
807 // Skip the repo’s own .hg (might be a symlink)
797 continue;
808 continue;
798 } else if metadata.is_dir() {
809 } else if metadata.is_dir() {
799 // A .hg sub-directory at another location means a subrepo,
810 // A .hg sub-directory at another location means a subrepo,
800 // skip it entirely.
811 // skip it entirely.
801 return Ok(Vec::new());
812 return Ok(Vec::new());
802 }
813 }
803 }
814 }
804 let full_path = if at_cwd {
815 let full_path = if at_cwd {
805 file_name.clone().into()
816 file_name.clone().into()
806 } else {
817 } else {
807 entry.path()
818 entry.path()
808 };
819 };
809 let base_name = get_bytes_from_os_string(file_name).into();
820 let base_name = get_bytes_from_os_string(file_name).into();
810 results.push(DirEntry {
821 results.push(DirEntry {
811 base_name,
822 base_name,
812 full_path,
823 full_path,
813 metadata,
824 metadata,
814 })
825 })
815 }
826 }
816 Ok(results)
827 Ok(results)
817 }
828 }
818 }
829 }
819
830
820 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
831 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
821 /// of the give repository.
832 /// of the give repository.
822 ///
833 ///
823 /// This is similar to `SystemTime::now()`, with the result truncated to the
834 /// This is similar to `SystemTime::now()`, with the result truncated to the
824 /// same time resolution as other files’ modification times. Using `.hg`
835 /// same time resolution as other files’ modification times. Using `.hg`
825 /// instead of the system’s default temporary directory (such as `/tmp`) makes
836 /// instead of the system’s default temporary directory (such as `/tmp`) makes
826 /// it more likely the temporary file is in the same disk partition as contents
837 /// it more likely the temporary file is in the same disk partition as contents
827 /// of the working directory, which can matter since different filesystems may
838 /// of the working directory, which can matter since different filesystems may
828 /// store timestamps with different resolutions.
839 /// store timestamps with different resolutions.
829 ///
840 ///
830 /// This may fail, typically if we lack write permissions. In that case we
841 /// This may fail, typically if we lack write permissions. In that case we
831 /// should continue the `status()` algoritm anyway and consider the current
842 /// should continue the `status()` algoritm anyway and consider the current
832 /// date/time to be unknown.
843 /// date/time to be unknown.
833 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
844 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
834 tempfile::tempfile_in(repo_root.join(".hg"))?
845 tempfile::tempfile_in(repo_root.join(".hg"))?
835 .metadata()?
846 .metadata()?
836 .modified()
847 .modified()
837 }
848 }
General Comments 0
You need to be logged in to leave comments. Login now