##// END OF EJS Templates
rust-status: explicitly track bad file types...
Spencer Baugh -
r51755:5efccea9 default
parent child Browse files
Show More
@@ -1,149 +1,149 b''
1 1 // status.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Rust implementation of dirstate.status (dirstate.py).
9 9 //! It is currently missing a lot of functionality compared to the Python one
10 10 //! and will only be triggered in narrow cases.
11 11
12 12 use crate::dirstate::entry::TruncatedTimestamp;
13 13 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
14 14 use crate::{
15 15 utils::hg_path::{HgPath, HgPathError},
16 16 PatternError,
17 17 };
18 18
19 19 use std::{borrow::Cow, fmt};
20 20
21 21 /// Wrong type of file from a `BadMatch`
22 22 /// Note: a lot of those don't exist on all platforms.
23 #[derive(Debug, Copy, Clone)]
23 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
24 24 pub enum BadType {
25 25 CharacterDevice,
26 26 BlockDevice,
27 27 FIFO,
28 28 Socket,
29 29 Directory,
30 30 Unknown,
31 31 }
32 32
33 33 impl fmt::Display for BadType {
34 34 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
35 35 f.write_str(match self {
36 36 BadType::CharacterDevice => "character device",
37 37 BadType::BlockDevice => "block device",
38 38 BadType::FIFO => "fifo",
39 39 BadType::Socket => "socket",
40 40 BadType::Directory => "directory",
41 41 BadType::Unknown => "unknown",
42 42 })
43 43 }
44 44 }
45 45
46 46 /// Was explicitly matched but cannot be found/accessed
47 47 #[derive(Debug, Copy, Clone)]
48 48 pub enum BadMatch {
49 49 OsError(i32),
50 50 BadType(BadType),
51 51 }
52 52
53 53 /// `Box<dyn Trait>` is syntactic sugar for `Box<dyn Trait + 'static>`, so add
54 54 /// an explicit lifetime here to not fight `'static` bounds "out of nowhere".
55 55 pub type IgnoreFnType<'a> =
56 56 Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>;
57 57
58 58 /// We have a good mix of owned (from directory traversal) and borrowed (from
59 59 /// the dirstate/explicit) paths, this comes up a lot.
60 60 pub type HgPathCow<'a> = Cow<'a, HgPath>;
61 61
62 62 #[derive(Debug, Copy, Clone)]
63 63 pub struct StatusOptions {
64 64 /// Whether we are on a filesystem with UNIX-like exec flags
65 65 pub check_exec: bool,
66 66 pub list_clean: bool,
67 67 pub list_unknown: bool,
68 68 pub list_ignored: bool,
69 69 /// Whether to populate `StatusPath::copy_source`
70 70 pub list_copies: bool,
71 71 /// Whether to collect traversed dirs for applying a callback later.
72 72 /// Used by `hg purge` for example.
73 73 pub collect_traversed_dirs: bool,
74 74 }
75 75
76 76 #[derive(Default)]
77 77 pub struct DirstateStatus<'a> {
78 78 /// The current time at the start of the `status()` algorithm, as measured
79 79 /// and possibly truncated by the filesystem.
80 80 pub filesystem_time_at_status_start: Option<TruncatedTimestamp>,
81 81
82 82 /// Tracked files whose contents have changed since the parent revision
83 83 pub modified: Vec<StatusPath<'a>>,
84 84
85 85 /// Newly-tracked files that were not present in the parent
86 86 pub added: Vec<StatusPath<'a>>,
87 87
88 88 /// Previously-tracked files that have been (re)moved with an hg command
89 89 pub removed: Vec<StatusPath<'a>>,
90 90
91 91 /// (Still) tracked files that are missing, (re)moved with an non-hg
92 92 /// command
93 93 pub deleted: Vec<StatusPath<'a>>,
94 94
95 95 /// Tracked files that are up to date with the parent.
96 96 /// Only pupulated if `StatusOptions::list_clean` is true.
97 97 pub clean: Vec<StatusPath<'a>>,
98 98
99 99 /// Files in the working directory that are ignored with `.hgignore`.
100 100 /// Only pupulated if `StatusOptions::list_ignored` is true.
101 101 pub ignored: Vec<StatusPath<'a>>,
102 102
103 103 /// Files in the working directory that are neither tracked nor ignored.
104 104 /// Only pupulated if `StatusOptions::list_unknown` is true.
105 105 pub unknown: Vec<StatusPath<'a>>,
106 106
107 107 /// Was explicitly matched but cannot be found/accessed
108 108 pub bad: Vec<(HgPathCow<'a>, BadMatch)>,
109 109
110 110 /// Either clean or modified, but we can’t tell from filesystem metadata
111 111 /// alone. The file contents need to be read and compared with that in
112 112 /// the parent.
113 113 pub unsure: Vec<StatusPath<'a>>,
114 114
115 115 /// Only filled if `collect_traversed_dirs` is `true`
116 116 pub traversed: Vec<HgPathCow<'a>>,
117 117
118 118 /// Whether `status()` made changed to the `DirstateMap` that should be
119 119 /// written back to disk
120 120 pub dirty: bool,
121 121 }
122 122
123 123 #[derive(Clone, PartialEq, Eq, PartialOrd, Ord)]
124 124 pub struct StatusPath<'a> {
125 125 pub path: HgPathCow<'a>,
126 126 pub copy_source: Option<HgPathCow<'a>>,
127 127 }
128 128
129 129 #[derive(Debug, derive_more::From)]
130 130 pub enum StatusError {
131 131 /// An invalid path that cannot be represented in Mercurial was found
132 132 Path(HgPathError),
133 133 /// An invalid "ignore" pattern was found
134 134 Pattern(PatternError),
135 135 /// Corrupted dirstate
136 136 DirstateV2ParseError(DirstateV2ParseError),
137 137 }
138 138
139 139 impl fmt::Display for StatusError {
140 140 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
141 141 match self {
142 142 StatusError::Path(error) => error.fmt(f),
143 143 StatusError::Pattern(error) => error.fmt(f),
144 144 StatusError::DirstateV2ParseError(_) => {
145 145 f.write_str("dirstate-v2 parse error")
146 146 }
147 147 }
148 148 }
149 149 }
@@ -1,996 +1,1013 b''
1 1 use crate::dirstate::entry::TruncatedTimestamp;
2 2 use crate::dirstate::status::IgnoreFnType;
3 3 use crate::dirstate::status::StatusPath;
4 4 use crate::dirstate_tree::dirstate_map::BorrowedPath;
5 5 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
6 6 use crate::dirstate_tree::dirstate_map::DirstateMap;
7 7 use crate::dirstate_tree::dirstate_map::DirstateVersion;
8 8 use crate::dirstate_tree::dirstate_map::NodeRef;
9 9 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
10 10 use crate::matchers::get_ignore_function;
11 11 use crate::matchers::Matcher;
12 12 use crate::utils::files::get_bytes_from_os_string;
13 13 use crate::utils::files::get_bytes_from_path;
14 14 use crate::utils::files::get_path_from_bytes;
15 15 use crate::utils::hg_path::HgPath;
16 16 use crate::BadMatch;
17 use crate::BadType;
17 18 use crate::DirstateStatus;
18 19 use crate::HgPathCow;
19 20 use crate::PatternFileWarning;
20 21 use crate::StatusError;
21 22 use crate::StatusOptions;
22 23 use once_cell::sync::OnceCell;
23 24 use rayon::prelude::*;
24 25 use sha1::{Digest, Sha1};
25 26 use std::borrow::Cow;
26 27 use std::io;
28 use std::os::unix::prelude::FileTypeExt;
27 29 use std::path::Path;
28 30 use std::path::PathBuf;
29 31 use std::sync::Mutex;
30 32 use std::time::SystemTime;
31 33
32 34 /// Returns the status of the working directory compared to its parent
33 35 /// changeset.
34 36 ///
35 37 /// This algorithm is based on traversing the filesystem tree (`fs` in function
36 38 /// and variable names) and dirstate tree at the same time. The core of this
37 39 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
38 40 /// and its use of `itertools::merge_join_by`. When reaching a path that only
39 41 /// exists in one of the two trees, depending on information requested by
40 42 /// `options` we may need to traverse the remaining subtree.
41 43 #[logging_timer::time("trace")]
42 44 pub fn status<'dirstate>(
43 45 dmap: &'dirstate mut DirstateMap,
44 46 matcher: &(dyn Matcher + Sync),
45 47 root_dir: PathBuf,
46 48 ignore_files: Vec<PathBuf>,
47 49 options: StatusOptions,
48 50 ) -> Result<(DirstateStatus<'dirstate>, Vec<PatternFileWarning>), StatusError>
49 51 {
50 52 // Also cap for a Python caller of this function, but don't complain if
51 53 // the global threadpool has already been set since this code path is also
52 54 // being used by `rhg`, which calls this early.
53 55 let _ = crate::utils::cap_default_rayon_threads();
54 56
55 57 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
56 58 if options.list_ignored || options.list_unknown {
57 59 let (ignore_fn, warnings, changed) = match dmap.dirstate_version {
58 60 DirstateVersion::V1 => {
59 61 let (ignore_fn, warnings) = get_ignore_function(
60 62 ignore_files,
61 63 &root_dir,
62 64 &mut |_source, _pattern_bytes| {},
63 65 )?;
64 66 (ignore_fn, warnings, None)
65 67 }
66 68 DirstateVersion::V2 => {
67 69 let mut hasher = Sha1::new();
68 70 let (ignore_fn, warnings) = get_ignore_function(
69 71 ignore_files,
70 72 &root_dir,
71 73 &mut |source, pattern_bytes| {
72 74 // If inside the repo, use the relative version to
73 75 // make it deterministic inside tests.
74 76 // The performance hit should be negligible.
75 77 let source = source
76 78 .strip_prefix(&root_dir)
77 79 .unwrap_or(source);
78 80 let source = get_bytes_from_path(source);
79 81
80 82 let mut subhasher = Sha1::new();
81 83 subhasher.update(pattern_bytes);
82 84 let patterns_hash = subhasher.finalize();
83 85
84 86 hasher.update(source);
85 87 hasher.update(b" ");
86 88 hasher.update(patterns_hash);
87 89 hasher.update(b"\n");
88 90 },
89 91 )?;
90 92 let new_hash = *hasher.finalize().as_ref();
91 93 let changed = new_hash != dmap.ignore_patterns_hash;
92 94 dmap.ignore_patterns_hash = new_hash;
93 95 (ignore_fn, warnings, Some(changed))
94 96 }
95 97 };
96 98 (ignore_fn, warnings, changed)
97 99 } else {
98 100 (Box::new(|&_| true), vec![], None)
99 101 };
100 102
101 103 let filesystem_time_at_status_start =
102 104 filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
103 105
104 106 // If the repository is under the current directory, prefer using a
105 107 // relative path, so the kernel needs to traverse fewer directory in every
106 108 // call to `read_dir` or `symlink_metadata`.
107 109 // This is effective in the common case where the current directory is the
108 110 // repository root.
109 111
110 112 // TODO: Better yet would be to use libc functions like `openat` and
111 113 // `fstatat` to remove such repeated traversals entirely, but the standard
112 114 // library does not provide APIs based on those.
113 115 // Maybe with a crate like https://crates.io/crates/openat instead?
114 116 let root_dir = if let Some(relative) = std::env::current_dir()
115 117 .ok()
116 118 .and_then(|cwd| root_dir.strip_prefix(cwd).ok())
117 119 {
118 120 relative
119 121 } else {
120 122 &root_dir
121 123 };
122 124
123 125 let outcome = DirstateStatus {
124 126 filesystem_time_at_status_start,
125 127 ..Default::default()
126 128 };
127 129 let common = StatusCommon {
128 130 dmap,
129 131 options,
130 132 matcher,
131 133 ignore_fn,
132 134 outcome: Mutex::new(outcome),
133 135 ignore_patterns_have_changed: patterns_changed,
134 136 new_cacheable_directories: Default::default(),
135 137 outdated_cached_directories: Default::default(),
136 138 filesystem_time_at_status_start,
137 139 };
138 140 let is_at_repo_root = true;
139 141 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
140 142 let has_ignored_ancestor = HasIgnoredAncestor::create(None, hg_path);
141 143 let root_cached_mtime = None;
142 144 // If the path we have for the repository root is a symlink, do follow it.
143 145 // (As opposed to symlinks within the working directory which are not
144 146 // followed, using `std::fs::symlink_metadata`.)
145 147 common.traverse_fs_directory_and_dirstate(
146 148 &has_ignored_ancestor,
147 149 dmap.root.as_ref(),
148 150 hg_path,
149 151 &DirEntry {
150 152 hg_path: Cow::Borrowed(HgPath::new(b"")),
151 153 fs_path: Cow::Borrowed(root_dir),
152 154 symlink_metadata: None,
153 155 file_type: FakeFileType::Directory,
154 156 },
155 157 root_cached_mtime,
156 158 is_at_repo_root,
157 159 )?;
158 160 let mut outcome = common.outcome.into_inner().unwrap();
159 161 let new_cacheable = common.new_cacheable_directories.into_inner().unwrap();
160 162 let outdated = common.outdated_cached_directories.into_inner().unwrap();
161 163
162 164 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
163 165 || !outdated.is_empty()
164 166 || (!new_cacheable.is_empty()
165 167 && dmap.dirstate_version == DirstateVersion::V2);
166 168
167 169 // Remove outdated mtimes before adding new mtimes, in case a given
168 170 // directory is both
169 171 for path in &outdated {
170 172 dmap.clear_cached_mtime(path)?;
171 173 }
172 174 for (path, mtime) in &new_cacheable {
173 175 dmap.set_cached_mtime(path, *mtime)?;
174 176 }
175 177
176 178 Ok((outcome, warnings))
177 179 }
178 180
179 181 /// Bag of random things needed by various parts of the algorithm. Reduces the
180 182 /// number of parameters passed to functions.
181 183 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
182 184 dmap: &'tree DirstateMap<'on_disk>,
183 185 options: StatusOptions,
184 186 matcher: &'a (dyn Matcher + Sync),
185 187 ignore_fn: IgnoreFnType<'a>,
186 188 outcome: Mutex<DirstateStatus<'on_disk>>,
187 189 /// New timestamps of directories to be used for caching their readdirs
188 190 new_cacheable_directories:
189 191 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
190 192 /// Used to invalidate the readdir cache of directories
191 193 outdated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
192 194
193 195 /// Whether ignore files like `.hgignore` have changed since the previous
194 196 /// time a `status()` call wrote their hash to the dirstate. `None` means
195 197 /// we don’t know as this run doesn’t list either ignored or uknown files
196 198 /// and therefore isn’t reading `.hgignore`.
197 199 ignore_patterns_have_changed: Option<bool>,
198 200
199 201 /// The current time at the start of the `status()` algorithm, as measured
200 202 /// and possibly truncated by the filesystem.
201 203 filesystem_time_at_status_start: Option<TruncatedTimestamp>,
202 204 }
203 205
204 206 enum Outcome {
205 207 Modified,
206 208 Added,
207 209 Removed,
208 210 Deleted,
209 211 Clean,
210 212 Ignored,
211 213 Unknown,
212 214 Unsure,
213 215 }
214 216
215 217 /// Lazy computation of whether a given path has a hgignored
216 218 /// ancestor.
217 219 struct HasIgnoredAncestor<'a> {
218 220 /// `path` and `parent` constitute the inputs to the computation,
219 221 /// `cache` stores the outcome.
220 222 path: &'a HgPath,
221 223 parent: Option<&'a HasIgnoredAncestor<'a>>,
222 224 cache: OnceCell<bool>,
223 225 }
224 226
225 227 impl<'a> HasIgnoredAncestor<'a> {
226 228 fn create(
227 229 parent: Option<&'a HasIgnoredAncestor<'a>>,
228 230 path: &'a HgPath,
229 231 ) -> HasIgnoredAncestor<'a> {
230 232 Self {
231 233 path,
232 234 parent,
233 235 cache: OnceCell::new(),
234 236 }
235 237 }
236 238
237 239 fn force<'b>(&self, ignore_fn: &IgnoreFnType<'b>) -> bool {
238 240 match self.parent {
239 241 None => false,
240 242 Some(parent) => {
241 243 *(self.cache.get_or_init(|| {
242 244 parent.force(ignore_fn) || ignore_fn(self.path)
243 245 }))
244 246 }
245 247 }
246 248 }
247 249 }
248 250
249 251 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
250 252 fn push_outcome(
251 253 &self,
252 254 which: Outcome,
253 255 dirstate_node: &NodeRef<'tree, 'on_disk>,
254 256 ) -> Result<(), DirstateV2ParseError> {
255 257 let path = dirstate_node
256 258 .full_path_borrowed(self.dmap.on_disk)?
257 259 .detach_from_tree();
258 260 let copy_source = if self.options.list_copies {
259 261 dirstate_node
260 262 .copy_source_borrowed(self.dmap.on_disk)?
261 263 .map(|source| source.detach_from_tree())
262 264 } else {
263 265 None
264 266 };
265 267 self.push_outcome_common(which, path, copy_source);
266 268 Ok(())
267 269 }
268 270
269 271 fn push_outcome_without_copy_source(
270 272 &self,
271 273 which: Outcome,
272 274 path: &BorrowedPath<'_, 'on_disk>,
273 275 ) {
274 276 self.push_outcome_common(which, path.detach_from_tree(), None)
275 277 }
276 278
277 279 fn push_outcome_common(
278 280 &self,
279 281 which: Outcome,
280 282 path: HgPathCow<'on_disk>,
281 283 copy_source: Option<HgPathCow<'on_disk>>,
282 284 ) {
283 285 let mut outcome = self.outcome.lock().unwrap();
284 286 let vec = match which {
285 287 Outcome::Modified => &mut outcome.modified,
286 288 Outcome::Added => &mut outcome.added,
287 289 Outcome::Removed => &mut outcome.removed,
288 290 Outcome::Deleted => &mut outcome.deleted,
289 291 Outcome::Clean => &mut outcome.clean,
290 292 Outcome::Ignored => &mut outcome.ignored,
291 293 Outcome::Unknown => &mut outcome.unknown,
292 294 Outcome::Unsure => &mut outcome.unsure,
293 295 };
294 296 vec.push(StatusPath { path, copy_source });
295 297 }
296 298
297 299 fn read_dir(
298 300 &self,
299 301 hg_path: &HgPath,
300 302 fs_path: &Path,
301 303 is_at_repo_root: bool,
302 304 ) -> Result<Vec<DirEntry>, ()> {
303 305 DirEntry::read_dir(fs_path, is_at_repo_root)
304 306 .map_err(|error| self.io_error(error, hg_path))
305 307 }
306 308
307 309 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
308 310 let errno = error.raw_os_error().expect("expected real OS error");
309 311 self.outcome
310 312 .lock()
311 313 .unwrap()
312 314 .bad
313 315 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
314 316 }
315 317
316 318 fn check_for_outdated_directory_cache(
317 319 &self,
318 320 dirstate_node: &NodeRef<'tree, 'on_disk>,
319 321 ) -> Result<bool, DirstateV2ParseError> {
320 322 if self.ignore_patterns_have_changed == Some(true)
321 323 && dirstate_node.cached_directory_mtime()?.is_some()
322 324 {
323 325 self.outdated_cached_directories.lock().unwrap().push(
324 326 dirstate_node
325 327 .full_path_borrowed(self.dmap.on_disk)?
326 328 .detach_from_tree(),
327 329 );
328 330 return Ok(true);
329 331 }
330 332 Ok(false)
331 333 }
332 334
333 335 /// If this returns true, we can get accurate results by only using
334 336 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
335 337 /// need to call `read_dir`.
336 338 fn can_skip_fs_readdir(
337 339 &self,
338 340 directory_entry: &DirEntry,
339 341 cached_directory_mtime: Option<TruncatedTimestamp>,
340 342 ) -> bool {
341 343 if !self.options.list_unknown && !self.options.list_ignored {
342 344 // All states that we care about listing have corresponding
343 345 // dirstate entries.
344 346 // This happens for example with `hg status -mard`.
345 347 return true;
346 348 }
347 349 if !self.options.list_ignored
348 350 && self.ignore_patterns_have_changed == Some(false)
349 351 {
350 352 if let Some(cached_mtime) = cached_directory_mtime {
351 353 // The dirstate contains a cached mtime for this directory, set
352 354 // by a previous run of the `status` algorithm which found this
353 355 // directory eligible for `read_dir` caching.
354 356 if let Ok(meta) = directory_entry.symlink_metadata() {
355 357 if cached_mtime
356 358 .likely_equal_to_mtime_of(&meta)
357 359 .unwrap_or(false)
358 360 {
359 361 // The mtime of that directory has not changed
360 362 // since then, which means that the results of
361 363 // `read_dir` should also be unchanged.
362 364 return true;
363 365 }
364 366 }
365 367 }
366 368 }
367 369 false
368 370 }
369 371
370 372 /// Returns whether all child entries of the filesystem directory have a
371 373 /// corresponding dirstate node or are ignored.
372 374 fn traverse_fs_directory_and_dirstate<'ancestor>(
373 375 &self,
374 376 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
375 377 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
376 378 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
377 379 directory_entry: &DirEntry,
378 380 cached_directory_mtime: Option<TruncatedTimestamp>,
379 381 is_at_repo_root: bool,
380 382 ) -> Result<bool, DirstateV2ParseError> {
381 383 if self.can_skip_fs_readdir(directory_entry, cached_directory_mtime) {
382 384 dirstate_nodes
383 385 .par_iter()
384 386 .map(|dirstate_node| {
385 387 let fs_path = &directory_entry.fs_path;
386 388 let fs_path = fs_path.join(get_path_from_bytes(
387 389 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
388 390 ));
389 391 match std::fs::symlink_metadata(&fs_path) {
390 392 Ok(fs_metadata) => {
391 let file_type =
392 match fs_metadata.file_type().try_into() {
393 Ok(file_type) => file_type,
394 Err(_) => return Ok(()),
395 };
393 let file_type = fs_metadata.file_type().into();
396 394 let entry = DirEntry {
397 395 hg_path: Cow::Borrowed(
398 396 dirstate_node
399 397 .full_path(self.dmap.on_disk)?,
400 398 ),
401 399 fs_path: Cow::Borrowed(&fs_path),
402 400 symlink_metadata: Some(fs_metadata),
403 401 file_type,
404 402 };
405 403 self.traverse_fs_and_dirstate(
406 404 &entry,
407 405 dirstate_node,
408 406 has_ignored_ancestor,
409 407 )
410 408 }
411 409 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
412 410 self.traverse_dirstate_only(dirstate_node)
413 411 }
414 412 Err(error) => {
415 413 let hg_path =
416 414 dirstate_node.full_path(self.dmap.on_disk)?;
417 415 self.io_error(error, hg_path);
418 416 Ok(())
419 417 }
420 418 }
421 419 })
422 420 .collect::<Result<_, _>>()?;
423 421
424 422 // We don’t know, so conservatively say this isn’t the case
425 423 let children_all_have_dirstate_node_or_are_ignored = false;
426 424
427 425 return Ok(children_all_have_dirstate_node_or_are_ignored);
428 426 }
429 427
430 428 let readdir_succeeded;
431 429 let mut fs_entries = if let Ok(entries) = self.read_dir(
432 430 directory_hg_path,
433 431 &directory_entry.fs_path,
434 432 is_at_repo_root,
435 433 ) {
436 434 readdir_succeeded = true;
437 435 entries
438 436 } else {
439 437 // Treat an unreadable directory (typically because of insufficient
440 438 // permissions) like an empty directory. `self.read_dir` has
441 439 // already called `self.io_error` so a warning will be emitted.
442 440 // We still need to remember that there was an error so that we
443 441 // know not to cache this result.
444 442 readdir_succeeded = false;
445 443 Vec::new()
446 444 };
447 445
448 446 // `merge_join_by` requires both its input iterators to be sorted:
449 447
450 448 let dirstate_nodes = dirstate_nodes.sorted();
451 449 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
452 450 // https://github.com/rust-lang/rust/issues/34162
453 451 fs_entries.sort_unstable_by(|e1, e2| e1.hg_path.cmp(&e2.hg_path));
454 452
455 453 // Propagate here any error that would happen inside the comparison
456 454 // callback below
457 455 for dirstate_node in &dirstate_nodes {
458 456 dirstate_node.base_name(self.dmap.on_disk)?;
459 457 }
460 458 itertools::merge_join_by(
461 459 dirstate_nodes,
462 460 &fs_entries,
463 461 |dirstate_node, fs_entry| {
464 462 // This `unwrap` never panics because we already propagated
465 463 // those errors above
466 464 dirstate_node
467 465 .base_name(self.dmap.on_disk)
468 466 .unwrap()
469 467 .cmp(&fs_entry.hg_path)
470 468 },
471 469 )
472 470 .par_bridge()
473 471 .map(|pair| {
474 472 use itertools::EitherOrBoth::*;
475 473 let has_dirstate_node_or_is_ignored = match pair {
476 474 Both(dirstate_node, fs_entry) => {
477 475 self.traverse_fs_and_dirstate(
478 476 fs_entry,
479 477 dirstate_node,
480 478 has_ignored_ancestor,
481 479 )?;
482 480 true
483 481 }
484 482 Left(dirstate_node) => {
485 483 self.traverse_dirstate_only(dirstate_node)?;
486 484 true
487 485 }
488 486 Right(fs_entry) => self.traverse_fs_only(
489 487 has_ignored_ancestor.force(&self.ignore_fn),
490 488 directory_hg_path,
491 489 fs_entry,
492 490 ),
493 491 };
494 492 Ok(has_dirstate_node_or_is_ignored)
495 493 })
496 494 .try_reduce(|| true, |a, b| Ok(a && b))
497 495 .map(|res| res && readdir_succeeded)
498 496 }
499 497
500 498 fn traverse_fs_and_dirstate<'ancestor>(
501 499 &self,
502 500 fs_entry: &DirEntry,
503 501 dirstate_node: NodeRef<'tree, 'on_disk>,
504 502 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
505 503 ) -> Result<(), DirstateV2ParseError> {
506 504 let outdated_dircache =
507 505 self.check_for_outdated_directory_cache(&dirstate_node)?;
508 506 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
509 507 let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink();
510 508 if !file_or_symlink {
511 509 // If we previously had a file here, it was removed (with
512 510 // `hg rm` or similar) or deleted before it could be
513 511 // replaced by a directory or something else.
514 512 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
515 513 }
514 if let Some(bad_type) = fs_entry.is_bad() {
515 if self.matcher.exact_match(hg_path) {
516 let path = dirstate_node.full_path(self.dmap.on_disk)?;
517 self.outcome.lock().unwrap().bad.push((
518 path.to_owned().into(),
519 BadMatch::BadType(bad_type),
520 ))
521 }
522 }
516 523 if fs_entry.is_dir() {
517 524 if self.options.collect_traversed_dirs {
518 525 self.outcome
519 526 .lock()
520 527 .unwrap()
521 528 .traversed
522 529 .push(hg_path.detach_from_tree())
523 530 }
524 531 let is_ignored = HasIgnoredAncestor::create(
525 532 Some(has_ignored_ancestor),
526 533 hg_path,
527 534 );
528 535 let is_at_repo_root = false;
529 536 let children_all_have_dirstate_node_or_are_ignored = self
530 537 .traverse_fs_directory_and_dirstate(
531 538 &is_ignored,
532 539 dirstate_node.children(self.dmap.on_disk)?,
533 540 hg_path,
534 541 fs_entry,
535 542 dirstate_node.cached_directory_mtime()?,
536 543 is_at_repo_root,
537 544 )?;
538 545 self.maybe_save_directory_mtime(
539 546 children_all_have_dirstate_node_or_are_ignored,
540 547 fs_entry,
541 548 dirstate_node,
542 549 outdated_dircache,
543 550 )?
544 551 } else {
545 552 if file_or_symlink && self.matcher.matches(hg_path) {
546 553 if let Some(entry) = dirstate_node.entry()? {
547 554 if !entry.any_tracked() {
548 555 // Forward-compat if we start tracking unknown/ignored
549 556 // files for caching reasons
550 557 self.mark_unknown_or_ignored(
551 558 has_ignored_ancestor.force(&self.ignore_fn),
552 559 hg_path,
553 560 );
554 561 }
555 562 if entry.added() {
556 563 self.push_outcome(Outcome::Added, &dirstate_node)?;
557 564 } else if entry.removed() {
558 565 self.push_outcome(Outcome::Removed, &dirstate_node)?;
559 566 } else if entry.modified() {
560 567 self.push_outcome(Outcome::Modified, &dirstate_node)?;
561 568 } else {
562 569 self.handle_normal_file(&dirstate_node, fs_entry)?;
563 570 }
564 571 } else {
565 572 // `node.entry.is_none()` indicates a "directory"
566 573 // node, but the filesystem has a file
567 574 self.mark_unknown_or_ignored(
568 575 has_ignored_ancestor.force(&self.ignore_fn),
569 576 hg_path,
570 577 );
571 578 }
572 579 }
573 580
574 581 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
575 582 {
576 583 self.traverse_dirstate_only(child_node)?
577 584 }
578 585 }
579 586 Ok(())
580 587 }
581 588
582 589 /// Save directory mtime if applicable.
583 590 ///
584 591 /// `outdated_directory_cache` is `true` if we've just invalidated the
585 592 /// cache for this directory in `check_for_outdated_directory_cache`,
586 593 /// which forces the update.
587 594 fn maybe_save_directory_mtime(
588 595 &self,
589 596 children_all_have_dirstate_node_or_are_ignored: bool,
590 597 directory_entry: &DirEntry,
591 598 dirstate_node: NodeRef<'tree, 'on_disk>,
592 599 outdated_directory_cache: bool,
593 600 ) -> Result<(), DirstateV2ParseError> {
594 601 if !children_all_have_dirstate_node_or_are_ignored {
595 602 return Ok(());
596 603 }
597 604 // All filesystem directory entries from `read_dir` have a
598 605 // corresponding node in the dirstate, so we can reconstitute the
599 606 // names of those entries without calling `read_dir` again.
600 607
601 608 // TODO: use let-else here and below when available:
602 609 // https://github.com/rust-lang/rust/issues/87335
603 610 let status_start = if let Some(status_start) =
604 611 &self.filesystem_time_at_status_start
605 612 {
606 613 status_start
607 614 } else {
608 615 return Ok(());
609 616 };
610 617
611 618 // Although the Rust standard library’s `SystemTime` type
612 619 // has nanosecond precision, the times reported for a
613 620 // directory’s (or file’s) modified time may have lower
614 621 // resolution based on the filesystem (for example ext3
615 622 // only stores integer seconds), kernel (see
616 623 // https://stackoverflow.com/a/14393315/1162888), etc.
617 624 let metadata = match directory_entry.symlink_metadata() {
618 625 Ok(meta) => meta,
619 626 Err(_) => return Ok(()),
620 627 };
621 628
622 629 let directory_mtime = match TruncatedTimestamp::for_reliable_mtime_of(
623 630 &metadata,
624 631 status_start,
625 632 ) {
626 633 Ok(Some(directory_mtime)) => directory_mtime,
627 634 Ok(None) => {
628 635 // The directory was modified too recently,
629 636 // don’t cache its `read_dir` results.
630 637 //
631 638 // 1. A change to this directory (direct child was
632 639 // added or removed) cause its mtime to be set
633 640 // (possibly truncated) to `directory_mtime`
634 641 // 2. This `status` algorithm calls `read_dir`
635 642 // 3. An other change is made to the same directory is
636 643 // made so that calling `read_dir` agin would give
637 644 // different results, but soon enough after 1. that
638 645 // the mtime stays the same
639 646 //
640 647 // On a system where the time resolution poor, this
641 648 // scenario is not unlikely if all three steps are caused
642 649 // by the same script.
643 650 return Ok(());
644 651 }
645 652 Err(_) => {
646 653 // OS/libc does not support mtime?
647 654 return Ok(());
648 655 }
649 656 };
650 657 // We’ve observed (through `status_start`) that time has
651 658 // “progressed” since `directory_mtime`, so any further
652 659 // change to this directory is extremely likely to cause a
653 660 // different mtime.
654 661 //
655 662 // Having the same mtime again is not entirely impossible
656 663 // since the system clock is not monotonous. It could jump
657 664 // backward to some point before `directory_mtime`, then a
658 665 // directory change could potentially happen during exactly
659 666 // the wrong tick.
660 667 //
661 668 // We deem this scenario (unlike the previous one) to be
662 669 // unlikely enough in practice.
663 670
664 671 let is_up_to_date = if let Some(cached) =
665 672 dirstate_node.cached_directory_mtime()?
666 673 {
667 674 !outdated_directory_cache && cached.likely_equal(directory_mtime)
668 675 } else {
669 676 false
670 677 };
671 678 if !is_up_to_date {
672 679 let hg_path = dirstate_node
673 680 .full_path_borrowed(self.dmap.on_disk)?
674 681 .detach_from_tree();
675 682 self.new_cacheable_directories
676 683 .lock()
677 684 .unwrap()
678 685 .push((hg_path, directory_mtime))
679 686 }
680 687 Ok(())
681 688 }
682 689
683 690 /// A file that is clean in the dirstate was found in the filesystem
684 691 fn handle_normal_file(
685 692 &self,
686 693 dirstate_node: &NodeRef<'tree, 'on_disk>,
687 694 fs_entry: &DirEntry,
688 695 ) -> Result<(), DirstateV2ParseError> {
689 696 // Keep the low 31 bits
690 697 fn truncate_u64(value: u64) -> i32 {
691 698 (value & 0x7FFF_FFFF) as i32
692 699 }
693 700
694 701 let fs_metadata = match fs_entry.symlink_metadata() {
695 702 Ok(meta) => meta,
696 703 Err(_) => return Ok(()),
697 704 };
698 705
699 706 let entry = dirstate_node
700 707 .entry()?
701 708 .expect("handle_normal_file called with entry-less node");
702 709 let mode_changed =
703 710 || self.options.check_exec && entry.mode_changed(&fs_metadata);
704 711 let size = entry.size();
705 712 let size_changed = size != truncate_u64(fs_metadata.len());
706 713 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
707 714 // issue6456: Size returned may be longer due to encryption
708 715 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
709 716 self.push_outcome(Outcome::Unsure, dirstate_node)?
710 717 } else if dirstate_node.has_copy_source()
711 718 || entry.is_from_other_parent()
712 719 || (size >= 0 && (size_changed || mode_changed()))
713 720 {
714 721 self.push_outcome(Outcome::Modified, dirstate_node)?
715 722 } else {
716 723 let mtime_looks_clean = if let Some(dirstate_mtime) =
717 724 entry.truncated_mtime()
718 725 {
719 726 let fs_mtime = TruncatedTimestamp::for_mtime_of(&fs_metadata)
720 727 .expect("OS/libc does not support mtime?");
721 728 // There might be a change in the future if for example the
722 729 // internal clock become off while process run, but this is a
723 730 // case where the issues the user would face
724 731 // would be a lot worse and there is nothing we
725 732 // can really do.
726 733 fs_mtime.likely_equal(dirstate_mtime)
727 734 } else {
728 735 // No mtime in the dirstate entry
729 736 false
730 737 };
731 738 if !mtime_looks_clean {
732 739 self.push_outcome(Outcome::Unsure, dirstate_node)?
733 740 } else if self.options.list_clean {
734 741 self.push_outcome(Outcome::Clean, dirstate_node)?
735 742 }
736 743 }
737 744 Ok(())
738 745 }
739 746
740 747 /// A node in the dirstate tree has no corresponding filesystem entry
741 748 fn traverse_dirstate_only(
742 749 &self,
743 750 dirstate_node: NodeRef<'tree, 'on_disk>,
744 751 ) -> Result<(), DirstateV2ParseError> {
745 752 self.check_for_outdated_directory_cache(&dirstate_node)?;
746 753 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
747 754 dirstate_node
748 755 .children(self.dmap.on_disk)?
749 756 .par_iter()
750 757 .map(|child_node| self.traverse_dirstate_only(child_node))
751 758 .collect()
752 759 }
753 760
754 761 /// A node in the dirstate tree has no corresponding *file* on the
755 762 /// filesystem
756 763 ///
757 764 /// Does nothing on a "directory" node
758 765 fn mark_removed_or_deleted_if_file(
759 766 &self,
760 767 dirstate_node: &NodeRef<'tree, 'on_disk>,
761 768 ) -> Result<(), DirstateV2ParseError> {
762 769 if let Some(entry) = dirstate_node.entry()? {
763 770 if !entry.any_tracked() {
764 771 // Future-compat for when we start storing ignored and unknown
765 772 // files for caching reasons
766 773 return Ok(());
767 774 }
768 775 let path = dirstate_node.full_path(self.dmap.on_disk)?;
769 776 if self.matcher.matches(path) {
770 777 if entry.removed() {
771 778 self.push_outcome(Outcome::Removed, dirstate_node)?
772 779 } else {
773 780 self.push_outcome(Outcome::Deleted, dirstate_node)?
774 781 }
775 782 }
776 783 }
777 784 Ok(())
778 785 }
779 786
780 787 /// Something in the filesystem has no corresponding dirstate node
781 788 ///
782 789 /// Returns whether that path is ignored
783 790 fn traverse_fs_only(
784 791 &self,
785 792 has_ignored_ancestor: bool,
786 793 directory_hg_path: &HgPath,
787 794 fs_entry: &DirEntry,
788 795 ) -> bool {
789 796 let hg_path = directory_hg_path.join(&fs_entry.hg_path);
790 797 let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink();
791 798 if fs_entry.is_dir() {
792 799 let is_ignored =
793 800 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
794 801 let traverse_children = if is_ignored {
795 802 // Descendants of an ignored directory are all ignored
796 803 self.options.list_ignored
797 804 } else {
798 805 // Descendants of an unknown directory may be either unknown or
799 806 // ignored
800 807 self.options.list_unknown || self.options.list_ignored
801 808 };
802 809 if traverse_children {
803 810 let is_at_repo_root = false;
804 811 if let Ok(children_fs_entries) =
805 812 self.read_dir(&hg_path, &fs_entry.fs_path, is_at_repo_root)
806 813 {
807 814 children_fs_entries.par_iter().for_each(|child_fs_entry| {
808 815 self.traverse_fs_only(
809 816 is_ignored,
810 817 &hg_path,
811 818 child_fs_entry,
812 819 );
813 820 })
814 821 }
815 822 if self.options.collect_traversed_dirs {
816 823 self.outcome.lock().unwrap().traversed.push(hg_path.into())
817 824 }
818 825 }
819 826 is_ignored
820 827 } else if file_or_symlink {
821 828 if self.matcher.matches(&hg_path) {
822 829 self.mark_unknown_or_ignored(
823 830 has_ignored_ancestor,
824 831 &BorrowedPath::InMemory(&hg_path),
825 832 )
826 833 } else {
827 834 // We haven’t computed whether this path is ignored. It
828 835 // might not be, and a future run of status might have a
829 836 // different matcher that matches it. So treat it as not
830 837 // ignored. That is, inhibit readdir caching of the parent
831 838 // directory.
832 839 false
833 840 }
834 841 } else {
835 842 // This is neither a directory, a plain file, or a symlink.
836 843 // Treat it like an ignored file.
837 844 true
838 845 }
839 846 }
840 847
841 848 /// Returns whether that path is ignored
842 849 fn mark_unknown_or_ignored(
843 850 &self,
844 851 has_ignored_ancestor: bool,
845 852 hg_path: &BorrowedPath<'_, 'on_disk>,
846 853 ) -> bool {
847 854 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
848 855 if is_ignored {
849 856 if self.options.list_ignored {
850 857 self.push_outcome_without_copy_source(
851 858 Outcome::Ignored,
852 859 hg_path,
853 860 )
854 861 }
855 862 } else if self.options.list_unknown {
856 863 self.push_outcome_without_copy_source(Outcome::Unknown, hg_path)
857 864 }
858 865 is_ignored
859 866 }
860 867 }
861 868
862 869 /// Since [`std::fs::FileType`] cannot be built directly, we emulate what we
863 870 /// care about.
864 871 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
865 872 enum FakeFileType {
866 873 File,
867 874 Directory,
868 875 Symlink,
876 BadType(BadType),
869 877 }
870 878
871 impl TryFrom<std::fs::FileType> for FakeFileType {
872 type Error = ();
873
874 fn try_from(f: std::fs::FileType) -> Result<Self, Self::Error> {
879 impl From<std::fs::FileType> for FakeFileType {
880 fn from(f: std::fs::FileType) -> Self {
875 881 if f.is_dir() {
876 Ok(Self::Directory)
882 Self::Directory
877 883 } else if f.is_file() {
878 Ok(Self::File)
884 Self::File
879 885 } else if f.is_symlink() {
880 Ok(Self::Symlink)
886 Self::Symlink
887 } else if f.is_fifo() {
888 Self::BadType(BadType::FIFO)
889 } else if f.is_block_device() {
890 Self::BadType(BadType::BlockDevice)
891 } else if f.is_char_device() {
892 Self::BadType(BadType::CharacterDevice)
893 } else if f.is_socket() {
894 Self::BadType(BadType::Socket)
881 895 } else {
882 // Things like FIFO etc.
883 Err(())
896 Self::BadType(BadType::Unknown)
884 897 }
885 898 }
886 899 }
887 900
888 901 struct DirEntry<'a> {
889 902 /// Path as stored in the dirstate, or just the filename for optimization.
890 903 hg_path: HgPathCow<'a>,
891 904 /// Filesystem path
892 905 fs_path: Cow<'a, Path>,
893 906 /// Lazily computed
894 907 symlink_metadata: Option<std::fs::Metadata>,
895 908 /// Already computed for ergonomics.
896 909 file_type: FakeFileType,
897 910 }
898 911
899 912 impl<'a> DirEntry<'a> {
900 913 /// Returns **unsorted** entries in the given directory, with name,
901 914 /// metadata and file type.
902 915 ///
903 916 /// If a `.hg` sub-directory is encountered:
904 917 ///
905 918 /// * At the repository root, ignore that sub-directory
906 919 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
907 920 /// list instead.
908 921 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
909 922 // `read_dir` returns a "not found" error for the empty path
910 923 let at_cwd = path == Path::new("");
911 924 let read_dir_path = if at_cwd { Path::new(".") } else { path };
912 925 let mut results = Vec::new();
913 926 for entry in read_dir_path.read_dir()? {
914 927 let entry = entry?;
915 928 let file_type = match entry.file_type() {
916 929 Ok(v) => v,
917 930 Err(e) => {
918 931 // race with file deletion?
919 932 if e.kind() == std::io::ErrorKind::NotFound {
920 933 continue;
921 934 } else {
922 935 return Err(e);
923 936 }
924 937 }
925 938 };
926 939 let file_name = entry.file_name();
927 940 // FIXME don't do this when cached
928 941 if file_name == ".hg" {
929 942 if is_at_repo_root {
930 943 // Skip the repo’s own .hg (might be a symlink)
931 944 continue;
932 945 } else if file_type.is_dir() {
933 946 // A .hg sub-directory at another location means a subrepo,
934 947 // skip it entirely.
935 948 return Ok(Vec::new());
936 949 }
937 950 }
938 951 let full_path = if at_cwd {
939 952 file_name.clone().into()
940 953 } else {
941 954 entry.path()
942 955 };
943 956 let filename =
944 957 Cow::Owned(get_bytes_from_os_string(file_name).into());
945 let file_type = match FakeFileType::try_from(file_type) {
946 Ok(file_type) => file_type,
947 Err(_) => continue,
948 };
958 let file_type = FakeFileType::from(file_type);
949 959 results.push(DirEntry {
950 960 hg_path: filename,
951 961 fs_path: Cow::Owned(full_path.to_path_buf()),
952 962 symlink_metadata: None,
953 963 file_type,
954 964 })
955 965 }
956 966 Ok(results)
957 967 }
958 968
959 969 fn symlink_metadata(&self) -> Result<std::fs::Metadata, std::io::Error> {
960 970 match &self.symlink_metadata {
961 971 Some(meta) => Ok(meta.clone()),
962 972 None => std::fs::symlink_metadata(&self.fs_path),
963 973 }
964 974 }
965 975
966 976 fn is_dir(&self) -> bool {
967 977 self.file_type == FakeFileType::Directory
968 978 }
969 979
970 980 fn is_file(&self) -> bool {
971 981 self.file_type == FakeFileType::File
972 982 }
973 983
974 984 fn is_symlink(&self) -> bool {
975 985 self.file_type == FakeFileType::Symlink
976 986 }
987
988 fn is_bad(&self) -> Option<BadType> {
989 match self.file_type {
990 FakeFileType::BadType(ty) => Some(ty),
991 _ => None,
992 }
993 }
977 994 }
978 995
979 996 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
980 997 /// of the give repository.
981 998 ///
982 999 /// This is similar to `SystemTime::now()`, with the result truncated to the
983 1000 /// same time resolution as other files’ modification times. Using `.hg`
984 1001 /// instead of the system’s default temporary directory (such as `/tmp`) makes
985 1002 /// it more likely the temporary file is in the same disk partition as contents
986 1003 /// of the working directory, which can matter since different filesystems may
987 1004 /// store timestamps with different resolutions.
988 1005 ///
989 1006 /// This may fail, typically if we lack write permissions. In that case we
990 1007 /// should continue the `status()` algoritm anyway and consider the current
991 1008 /// date/time to be unknown.
992 1009 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
993 1010 tempfile::tempfile_in(repo_root.join(".hg"))?
994 1011 .metadata()?
995 1012 .modified()
996 1013 }
General Comments 0
You need to be logged in to leave comments. Login now