##// END OF EJS Templates
rust-status: query fs traversal metadata lazily...
Raphaël Gomès -
r50459:da48f170 default
parent child Browse files
Show More
@@ -1,903 +1,975 b''
1 1 use crate::dirstate::entry::TruncatedTimestamp;
2 2 use crate::dirstate::status::IgnoreFnType;
3 3 use crate::dirstate::status::StatusPath;
4 4 use crate::dirstate_tree::dirstate_map::BorrowedPath;
5 5 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
6 6 use crate::dirstate_tree::dirstate_map::DirstateMap;
7 7 use crate::dirstate_tree::dirstate_map::DirstateVersion;
8 8 use crate::dirstate_tree::dirstate_map::NodeRef;
9 9 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
10 10 use crate::matchers::get_ignore_function;
11 11 use crate::matchers::Matcher;
12 12 use crate::utils::files::get_bytes_from_os_string;
13 13 use crate::utils::files::get_path_from_bytes;
14 14 use crate::utils::hg_path::HgPath;
15 15 use crate::BadMatch;
16 16 use crate::DirstateStatus;
17 use crate::HgPathBuf;
18 17 use crate::HgPathCow;
19 18 use crate::PatternFileWarning;
20 19 use crate::StatusError;
21 20 use crate::StatusOptions;
22 21 use micro_timer::timed;
23 22 use once_cell::sync::OnceCell;
24 23 use rayon::prelude::*;
25 24 use sha1::{Digest, Sha1};
26 25 use std::borrow::Cow;
26 use std::convert::TryFrom;
27 use std::convert::TryInto;
27 28 use std::io;
28 29 use std::path::Path;
29 30 use std::path::PathBuf;
30 31 use std::sync::Mutex;
31 32 use std::time::SystemTime;
32 33
33 34 /// Returns the status of the working directory compared to its parent
34 35 /// changeset.
35 36 ///
36 37 /// This algorithm is based on traversing the filesystem tree (`fs` in function
37 38 /// and variable names) and dirstate tree at the same time. The core of this
38 39 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
39 40 /// and its use of `itertools::merge_join_by`. When reaching a path that only
40 41 /// exists in one of the two trees, depending on information requested by
41 42 /// `options` we may need to traverse the remaining subtree.
42 43 #[timed]
43 44 pub fn status<'dirstate>(
44 45 dmap: &'dirstate mut DirstateMap,
45 46 matcher: &(dyn Matcher + Sync),
46 47 root_dir: PathBuf,
47 48 ignore_files: Vec<PathBuf>,
48 49 options: StatusOptions,
49 50 ) -> Result<(DirstateStatus<'dirstate>, Vec<PatternFileWarning>), StatusError>
50 51 {
51 52 // Force the global rayon threadpool to not exceed 16 concurrent threads.
52 53 // This is a stop-gap measure until we figure out why using more than 16
53 54 // threads makes `status` slower for each additional thread.
54 55 // We use `ok()` in case the global threadpool has already been
55 56 // instantiated in `rhg` or some other caller.
56 57 // TODO find the underlying cause and fix it, then remove this.
57 58 rayon::ThreadPoolBuilder::new()
58 59 .num_threads(16)
59 60 .build_global()
60 61 .ok();
61 62
62 63 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
63 64 if options.list_ignored || options.list_unknown {
64 65 let (ignore_fn, warnings, changed) = match dmap.dirstate_version {
65 66 DirstateVersion::V1 => {
66 67 let (ignore_fn, warnings) = get_ignore_function(
67 68 ignore_files,
68 69 &root_dir,
69 70 &mut |_pattern_bytes| {},
70 71 )?;
71 72 (ignore_fn, warnings, None)
72 73 }
73 74 DirstateVersion::V2 => {
74 75 let mut hasher = Sha1::new();
75 76 let (ignore_fn, warnings) = get_ignore_function(
76 77 ignore_files,
77 78 &root_dir,
78 79 &mut |pattern_bytes| hasher.update(pattern_bytes),
79 80 )?;
80 81 let new_hash = *hasher.finalize().as_ref();
81 82 let changed = new_hash != dmap.ignore_patterns_hash;
82 83 dmap.ignore_patterns_hash = new_hash;
83 84 (ignore_fn, warnings, Some(changed))
84 85 }
85 86 };
86 87 (ignore_fn, warnings, changed)
87 88 } else {
88 89 (Box::new(|&_| true), vec![], None)
89 90 };
90 91
91 92 let filesystem_time_at_status_start =
92 93 filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
93 94
94 95 // If the repository is under the current directory, prefer using a
95 96 // relative path, so the kernel needs to traverse fewer directory in every
96 97 // call to `read_dir` or `symlink_metadata`.
97 98 // This is effective in the common case where the current directory is the
98 99 // repository root.
99 100
100 101 // TODO: Better yet would be to use libc functions like `openat` and
101 102 // `fstatat` to remove such repeated traversals entirely, but the standard
102 103 // library does not provide APIs based on those.
103 104 // Maybe with a crate like https://crates.io/crates/openat instead?
104 105 let root_dir = if let Some(relative) = std::env::current_dir()
105 106 .ok()
106 107 .and_then(|cwd| root_dir.strip_prefix(cwd).ok())
107 108 {
108 109 relative
109 110 } else {
110 111 &root_dir
111 112 };
112 113
113 114 let outcome = DirstateStatus {
114 115 filesystem_time_at_status_start,
115 116 ..Default::default()
116 117 };
117 118 let common = StatusCommon {
118 119 dmap,
119 120 options,
120 121 matcher,
121 122 ignore_fn,
122 123 outcome: Mutex::new(outcome),
123 124 ignore_patterns_have_changed: patterns_changed,
124 125 new_cachable_directories: Default::default(),
125 126 outated_cached_directories: Default::default(),
126 127 filesystem_time_at_status_start,
127 128 };
128 129 let is_at_repo_root = true;
129 130 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
130 131 let has_ignored_ancestor = HasIgnoredAncestor::create(None, hg_path);
131 132 let root_cached_mtime = None;
132 let root_dir_metadata = None;
133 133 // If the path we have for the repository root is a symlink, do follow it.
134 134 // (As opposed to symlinks within the working directory which are not
135 135 // followed, using `std::fs::symlink_metadata`.)
136 136 common.traverse_fs_directory_and_dirstate(
137 137 &has_ignored_ancestor,
138 138 dmap.root.as_ref(),
139 139 hg_path,
140 &root_dir,
141 root_dir_metadata,
140 &DirEntry {
141 hg_path: Cow::Borrowed(HgPath::new(b"")),
142 fs_path: Cow::Borrowed(&root_dir),
143 symlink_metadata: None,
144 file_type: FakeFileType::Directory,
145 },
142 146 root_cached_mtime,
143 147 is_at_repo_root,
144 148 )?;
145 149 let mut outcome = common.outcome.into_inner().unwrap();
146 150 let new_cachable = common.new_cachable_directories.into_inner().unwrap();
147 151 let outdated = common.outated_cached_directories.into_inner().unwrap();
148 152
149 153 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
150 154 || !outdated.is_empty()
151 155 || (!new_cachable.is_empty()
152 156 && dmap.dirstate_version == DirstateVersion::V2);
153 157
154 158 // Remove outdated mtimes before adding new mtimes, in case a given
155 159 // directory is both
156 160 for path in &outdated {
157 161 dmap.clear_cached_mtime(path)?;
158 162 }
159 163 for (path, mtime) in &new_cachable {
160 164 dmap.set_cached_mtime(path, *mtime)?;
161 165 }
162 166
163 167 Ok((outcome, warnings))
164 168 }
165 169
166 170 /// Bag of random things needed by various parts of the algorithm. Reduces the
167 171 /// number of parameters passed to functions.
168 172 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
169 173 dmap: &'tree DirstateMap<'on_disk>,
170 174 options: StatusOptions,
171 175 matcher: &'a (dyn Matcher + Sync),
172 176 ignore_fn: IgnoreFnType<'a>,
173 177 outcome: Mutex<DirstateStatus<'on_disk>>,
174 178 new_cachable_directories:
175 179 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
176 180 outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
177 181
178 182 /// Whether ignore files like `.hgignore` have changed since the previous
179 183 /// time a `status()` call wrote their hash to the dirstate. `None` means
180 184 /// we don’t know as this run doesn’t list either ignored or uknown files
181 185 /// and therefore isn’t reading `.hgignore`.
182 186 ignore_patterns_have_changed: Option<bool>,
183 187
184 188 /// The current time at the start of the `status()` algorithm, as measured
185 189 /// and possibly truncated by the filesystem.
186 190 filesystem_time_at_status_start: Option<TruncatedTimestamp>,
187 191 }
188 192
189 193 enum Outcome {
190 194 Modified,
191 195 Added,
192 196 Removed,
193 197 Deleted,
194 198 Clean,
195 199 Ignored,
196 200 Unknown,
197 201 Unsure,
198 202 }
199 203
200 204 /// Lazy computation of whether a given path has a hgignored
201 205 /// ancestor.
202 206 struct HasIgnoredAncestor<'a> {
203 207 /// `path` and `parent` constitute the inputs to the computation,
204 208 /// `cache` stores the outcome.
205 209 path: &'a HgPath,
206 210 parent: Option<&'a HasIgnoredAncestor<'a>>,
207 211 cache: OnceCell<bool>,
208 212 }
209 213
210 214 impl<'a> HasIgnoredAncestor<'a> {
211 215 fn create(
212 216 parent: Option<&'a HasIgnoredAncestor<'a>>,
213 217 path: &'a HgPath,
214 218 ) -> HasIgnoredAncestor<'a> {
215 219 Self {
216 220 path,
217 221 parent,
218 222 cache: OnceCell::new(),
219 223 }
220 224 }
221 225
222 226 fn force<'b>(&self, ignore_fn: &IgnoreFnType<'b>) -> bool {
223 227 match self.parent {
224 228 None => false,
225 229 Some(parent) => {
226 230 *(parent.cache.get_or_init(|| {
227 231 parent.force(ignore_fn) || ignore_fn(&self.path)
228 232 }))
229 233 }
230 234 }
231 235 }
232 236 }
233 237
234 238 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
235 239 fn push_outcome(
236 240 &self,
237 241 which: Outcome,
238 242 dirstate_node: &NodeRef<'tree, 'on_disk>,
239 243 ) -> Result<(), DirstateV2ParseError> {
240 244 let path = dirstate_node
241 245 .full_path_borrowed(self.dmap.on_disk)?
242 246 .detach_from_tree();
243 247 let copy_source = if self.options.list_copies {
244 248 dirstate_node
245 249 .copy_source_borrowed(self.dmap.on_disk)?
246 250 .map(|source| source.detach_from_tree())
247 251 } else {
248 252 None
249 253 };
250 254 self.push_outcome_common(which, path, copy_source);
251 255 Ok(())
252 256 }
253 257
254 258 fn push_outcome_without_copy_source(
255 259 &self,
256 260 which: Outcome,
257 261 path: &BorrowedPath<'_, 'on_disk>,
258 262 ) {
259 263 self.push_outcome_common(which, path.detach_from_tree(), None)
260 264 }
261 265
262 266 fn push_outcome_common(
263 267 &self,
264 268 which: Outcome,
265 269 path: HgPathCow<'on_disk>,
266 270 copy_source: Option<HgPathCow<'on_disk>>,
267 271 ) {
268 272 let mut outcome = self.outcome.lock().unwrap();
269 273 let vec = match which {
270 274 Outcome::Modified => &mut outcome.modified,
271 275 Outcome::Added => &mut outcome.added,
272 276 Outcome::Removed => &mut outcome.removed,
273 277 Outcome::Deleted => &mut outcome.deleted,
274 278 Outcome::Clean => &mut outcome.clean,
275 279 Outcome::Ignored => &mut outcome.ignored,
276 280 Outcome::Unknown => &mut outcome.unknown,
277 281 Outcome::Unsure => &mut outcome.unsure,
278 282 };
279 283 vec.push(StatusPath { path, copy_source });
280 284 }
281 285
282 286 fn read_dir(
283 287 &self,
284 288 hg_path: &HgPath,
285 289 fs_path: &Path,
286 290 is_at_repo_root: bool,
287 291 ) -> Result<Vec<DirEntry>, ()> {
288 292 DirEntry::read_dir(fs_path, is_at_repo_root)
289 293 .map_err(|error| self.io_error(error, hg_path))
290 294 }
291 295
292 296 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
293 297 let errno = error.raw_os_error().expect("expected real OS error");
294 298 self.outcome
295 299 .lock()
296 300 .unwrap()
297 301 .bad
298 302 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
299 303 }
300 304
301 305 fn check_for_outdated_directory_cache(
302 306 &self,
303 307 dirstate_node: &NodeRef<'tree, 'on_disk>,
304 308 ) -> Result<(), DirstateV2ParseError> {
305 309 if self.ignore_patterns_have_changed == Some(true)
306 310 && dirstate_node.cached_directory_mtime()?.is_some()
307 311 {
308 312 self.outated_cached_directories.lock().unwrap().push(
309 313 dirstate_node
310 314 .full_path_borrowed(self.dmap.on_disk)?
311 315 .detach_from_tree(),
312 316 )
313 317 }
314 318 Ok(())
315 319 }
316 320
317 321 /// If this returns true, we can get accurate results by only using
318 322 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
319 323 /// need to call `read_dir`.
320 324 fn can_skip_fs_readdir(
321 325 &self,
322 directory_metadata: Option<&std::fs::Metadata>,
326 directory_entry: &DirEntry,
323 327 cached_directory_mtime: Option<TruncatedTimestamp>,
324 328 ) -> bool {
325 329 if !self.options.list_unknown && !self.options.list_ignored {
326 330 // All states that we care about listing have corresponding
327 331 // dirstate entries.
328 332 // This happens for example with `hg status -mard`.
329 333 return true;
330 334 }
331 335 if !self.options.list_ignored
332 336 && self.ignore_patterns_have_changed == Some(false)
333 337 {
334 338 if let Some(cached_mtime) = cached_directory_mtime {
335 339 // The dirstate contains a cached mtime for this directory, set
336 340 // by a previous run of the `status` algorithm which found this
337 341 // directory eligible for `read_dir` caching.
338 if let Some(meta) = directory_metadata {
342 if let Ok(meta) = directory_entry.symlink_metadata() {
339 343 if cached_mtime
340 .likely_equal_to_mtime_of(meta)
344 .likely_equal_to_mtime_of(&meta)
341 345 .unwrap_or(false)
342 346 {
343 347 // The mtime of that directory has not changed
344 348 // since then, which means that the results of
345 349 // `read_dir` should also be unchanged.
346 350 return true;
347 351 }
348 352 }
349 353 }
350 354 }
351 355 false
352 356 }
353 357
354 358 /// Returns whether all child entries of the filesystem directory have a
355 359 /// corresponding dirstate node or are ignored.
356 360 fn traverse_fs_directory_and_dirstate<'ancestor>(
357 361 &self,
358 362 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
359 363 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
360 364 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
361 directory_fs_path: &Path,
362 directory_metadata: Option<&std::fs::Metadata>,
365 directory_entry: &DirEntry,
363 366 cached_directory_mtime: Option<TruncatedTimestamp>,
364 367 is_at_repo_root: bool,
365 368 ) -> Result<bool, DirstateV2ParseError> {
366 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
367 {
369 if self.can_skip_fs_readdir(directory_entry, cached_directory_mtime) {
368 370 dirstate_nodes
369 371 .par_iter()
370 372 .map(|dirstate_node| {
371 let fs_path = directory_fs_path.join(get_path_from_bytes(
373 let fs_path = &directory_entry.fs_path;
374 let fs_path = fs_path.join(get_path_from_bytes(
372 375 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
373 376 ));
374 377 match std::fs::symlink_metadata(&fs_path) {
375 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
376 &fs_path,
377 &fs_metadata,
378 dirstate_node,
379 has_ignored_ancestor,
380 ),
378 Ok(fs_metadata) => {
379 let file_type =
380 match fs_metadata.file_type().try_into() {
381 Ok(file_type) => file_type,
382 Err(_) => return Ok(()),
383 };
384 let entry = DirEntry {
385 hg_path: Cow::Borrowed(
386 dirstate_node
387 .full_path(&self.dmap.on_disk)?,
388 ),
389 fs_path: Cow::Borrowed(&fs_path),
390 symlink_metadata: Some(fs_metadata),
391 file_type,
392 };
393 self.traverse_fs_and_dirstate(
394 &entry,
395 dirstate_node,
396 has_ignored_ancestor,
397 )
398 }
381 399 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
382 400 self.traverse_dirstate_only(dirstate_node)
383 401 }
384 402 Err(error) => {
385 403 let hg_path =
386 404 dirstate_node.full_path(self.dmap.on_disk)?;
387 405 Ok(self.io_error(error, hg_path))
388 406 }
389 407 }
390 408 })
391 409 .collect::<Result<_, _>>()?;
392 410
393 411 // We don’t know, so conservatively say this isn’t the case
394 412 let children_all_have_dirstate_node_or_are_ignored = false;
395 413
396 414 return Ok(children_all_have_dirstate_node_or_are_ignored);
397 415 }
398 416
399 417 let mut fs_entries = if let Ok(entries) = self.read_dir(
400 418 directory_hg_path,
401 directory_fs_path,
419 &directory_entry.fs_path,
402 420 is_at_repo_root,
403 421 ) {
404 422 entries
405 423 } else {
406 424 // Treat an unreadable directory (typically because of insufficient
407 425 // permissions) like an empty directory. `self.read_dir` has
408 426 // already called `self.io_error` so a warning will be emitted.
409 427 Vec::new()
410 428 };
411 429
412 430 // `merge_join_by` requires both its input iterators to be sorted:
413 431
414 432 let dirstate_nodes = dirstate_nodes.sorted();
415 433 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
416 434 // https://github.com/rust-lang/rust/issues/34162
417 435 fs_entries.sort_unstable_by(|e1, e2| e1.hg_path.cmp(&e2.hg_path));
418 436
419 437 // Propagate here any error that would happen inside the comparison
420 438 // callback below
421 439 for dirstate_node in &dirstate_nodes {
422 440 dirstate_node.base_name(self.dmap.on_disk)?;
423 441 }
424 442 itertools::merge_join_by(
425 443 dirstate_nodes,
426 444 &fs_entries,
427 445 |dirstate_node, fs_entry| {
428 446 // This `unwrap` never panics because we already propagated
429 447 // those errors above
430 448 dirstate_node
431 449 .base_name(self.dmap.on_disk)
432 450 .unwrap()
433 451 .cmp(&fs_entry.hg_path)
434 452 },
435 453 )
436 454 .par_bridge()
437 455 .map(|pair| {
438 456 use itertools::EitherOrBoth::*;
439 457 let has_dirstate_node_or_is_ignored;
440 458 match pair {
441 459 Both(dirstate_node, fs_entry) => {
442 460 self.traverse_fs_and_dirstate(
443 &fs_entry.fs_path,
444 &fs_entry.metadata,
461 &fs_entry,
445 462 dirstate_node,
446 463 has_ignored_ancestor,
447 464 )?;
448 465 has_dirstate_node_or_is_ignored = true
449 466 }
450 467 Left(dirstate_node) => {
451 468 self.traverse_dirstate_only(dirstate_node)?;
452 469 has_dirstate_node_or_is_ignored = true;
453 470 }
454 471 Right(fs_entry) => {
455 472 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
456 473 has_ignored_ancestor.force(&self.ignore_fn),
457 474 directory_hg_path,
458 475 fs_entry,
459 476 )
460 477 }
461 478 }
462 479 Ok(has_dirstate_node_or_is_ignored)
463 480 })
464 481 .try_reduce(|| true, |a, b| Ok(a && b))
465 482 }
466 483
467 484 fn traverse_fs_and_dirstate<'ancestor>(
468 485 &self,
469 fs_path: &Path,
470 fs_metadata: &std::fs::Metadata,
486 fs_entry: &DirEntry,
471 487 dirstate_node: NodeRef<'tree, 'on_disk>,
472 488 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
473 489 ) -> Result<(), DirstateV2ParseError> {
474 490 self.check_for_outdated_directory_cache(&dirstate_node)?;
475 491 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
476 let file_type = fs_metadata.file_type();
477 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
492 let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink();
478 493 if !file_or_symlink {
479 494 // If we previously had a file here, it was removed (with
480 495 // `hg rm` or similar) or deleted before it could be
481 496 // replaced by a directory or something else.
482 497 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
483 498 }
484 if file_type.is_dir() {
499 if fs_entry.is_dir() {
485 500 if self.options.collect_traversed_dirs {
486 501 self.outcome
487 502 .lock()
488 503 .unwrap()
489 504 .traversed
490 505 .push(hg_path.detach_from_tree())
491 506 }
492 507 let is_ignored = HasIgnoredAncestor::create(
493 508 Some(&has_ignored_ancestor),
494 509 hg_path,
495 510 );
496 511 let is_at_repo_root = false;
497 512 let children_all_have_dirstate_node_or_are_ignored = self
498 513 .traverse_fs_directory_and_dirstate(
499 514 &is_ignored,
500 515 dirstate_node.children(self.dmap.on_disk)?,
501 516 hg_path,
502 fs_path,
503 Some(fs_metadata),
517 fs_entry,
504 518 dirstate_node.cached_directory_mtime()?,
505 519 is_at_repo_root,
506 520 )?;
507 521 self.maybe_save_directory_mtime(
508 522 children_all_have_dirstate_node_or_are_ignored,
509 fs_metadata,
523 fs_entry,
510 524 dirstate_node,
511 525 )?
512 526 } else {
513 527 if file_or_symlink && self.matcher.matches(&hg_path) {
514 528 if let Some(entry) = dirstate_node.entry()? {
515 529 if !entry.any_tracked() {
516 530 // Forward-compat if we start tracking unknown/ignored
517 531 // files for caching reasons
518 532 self.mark_unknown_or_ignored(
519 533 has_ignored_ancestor.force(&self.ignore_fn),
520 534 &hg_path,
521 535 );
522 536 }
523 537 if entry.added() {
524 538 self.push_outcome(Outcome::Added, &dirstate_node)?;
525 539 } else if entry.removed() {
526 540 self.push_outcome(Outcome::Removed, &dirstate_node)?;
527 541 } else if entry.modified() {
528 542 self.push_outcome(Outcome::Modified, &dirstate_node)?;
529 543 } else {
530 self.handle_normal_file(&dirstate_node, fs_metadata)?;
544 self.handle_normal_file(&dirstate_node, fs_entry)?;
531 545 }
532 546 } else {
533 547 // `node.entry.is_none()` indicates a "directory"
534 548 // node, but the filesystem has a file
535 549 self.mark_unknown_or_ignored(
536 550 has_ignored_ancestor.force(&self.ignore_fn),
537 551 hg_path,
538 552 );
539 553 }
540 554 }
541 555
542 556 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
543 557 {
544 558 self.traverse_dirstate_only(child_node)?
545 559 }
546 560 }
547 561 Ok(())
548 562 }
549 563
550 564 fn maybe_save_directory_mtime(
551 565 &self,
552 566 children_all_have_dirstate_node_or_are_ignored: bool,
553 directory_metadata: &std::fs::Metadata,
567 directory_entry: &DirEntry,
554 568 dirstate_node: NodeRef<'tree, 'on_disk>,
555 569 ) -> Result<(), DirstateV2ParseError> {
556 570 if !children_all_have_dirstate_node_or_are_ignored {
557 571 return Ok(());
558 572 }
559 573 // All filesystem directory entries from `read_dir` have a
560 574 // corresponding node in the dirstate, so we can reconstitute the
561 575 // names of those entries without calling `read_dir` again.
562 576
563 577 // TODO: use let-else here and below when available:
564 578 // https://github.com/rust-lang/rust/issues/87335
565 579 let status_start = if let Some(status_start) =
566 580 &self.filesystem_time_at_status_start
567 581 {
568 582 status_start
569 583 } else {
570 584 return Ok(());
571 585 };
572 586
573 587 // Although the Rust standard library’s `SystemTime` type
574 588 // has nanosecond precision, the times reported for a
575 589 // directory’s (or file’s) modified time may have lower
576 590 // resolution based on the filesystem (for example ext3
577 591 // only stores integer seconds), kernel (see
578 592 // https://stackoverflow.com/a/14393315/1162888), etc.
593 let metadata = match directory_entry.symlink_metadata() {
594 Ok(meta) => meta,
595 Err(_) => return Ok(()),
596 };
579 597 let directory_mtime = if let Ok(option) =
580 TruncatedTimestamp::for_reliable_mtime_of(
581 directory_metadata,
582 status_start,
583 ) {
598 TruncatedTimestamp::for_reliable_mtime_of(&metadata, status_start)
599 {
584 600 if let Some(directory_mtime) = option {
585 601 directory_mtime
586 602 } else {
587 603 // The directory was modified too recently,
588 604 // don’t cache its `read_dir` results.
589 605 //
590 606 // 1. A change to this directory (direct child was
591 607 // added or removed) cause its mtime to be set
592 608 // (possibly truncated) to `directory_mtime`
593 609 // 2. This `status` algorithm calls `read_dir`
594 610 // 3. An other change is made to the same directory is
595 611 // made so that calling `read_dir` agin would give
596 612 // different results, but soon enough after 1. that
597 613 // the mtime stays the same
598 614 //
599 615 // On a system where the time resolution poor, this
600 616 // scenario is not unlikely if all three steps are caused
601 617 // by the same script.
602 618 return Ok(());
603 619 }
604 620 } else {
605 621 // OS/libc does not support mtime?
606 622 return Ok(());
607 623 };
608 624 // We’ve observed (through `status_start`) that time has
609 625 // “progressed” since `directory_mtime`, so any further
610 626 // change to this directory is extremely likely to cause a
611 627 // different mtime.
612 628 //
613 629 // Having the same mtime again is not entirely impossible
614 630 // since the system clock is not monotonous. It could jump
615 631 // backward to some point before `directory_mtime`, then a
616 632 // directory change could potentially happen during exactly
617 633 // the wrong tick.
618 634 //
619 635 // We deem this scenario (unlike the previous one) to be
620 636 // unlikely enough in practice.
621 637
622 638 let is_up_to_date =
623 639 if let Some(cached) = dirstate_node.cached_directory_mtime()? {
624 640 cached.likely_equal(directory_mtime)
625 641 } else {
626 642 false
627 643 };
628 644 if !is_up_to_date {
629 645 let hg_path = dirstate_node
630 646 .full_path_borrowed(self.dmap.on_disk)?
631 647 .detach_from_tree();
632 648 self.new_cachable_directories
633 649 .lock()
634 650 .unwrap()
635 651 .push((hg_path, directory_mtime))
636 652 }
637 653 Ok(())
638 654 }
639 655
640 656 /// A file that is clean in the dirstate was found in the filesystem
641 657 fn handle_normal_file(
642 658 &self,
643 659 dirstate_node: &NodeRef<'tree, 'on_disk>,
644 fs_metadata: &std::fs::Metadata,
660 fs_entry: &DirEntry,
645 661 ) -> Result<(), DirstateV2ParseError> {
646 662 // Keep the low 31 bits
647 663 fn truncate_u64(value: u64) -> i32 {
648 664 (value & 0x7FFF_FFFF) as i32
649 665 }
650 666
667 let fs_metadata = match fs_entry.symlink_metadata() {
668 Ok(meta) => meta,
669 Err(_) => return Ok(()),
670 };
671
651 672 let entry = dirstate_node
652 673 .entry()?
653 674 .expect("handle_normal_file called with entry-less node");
654 675 let mode_changed =
655 || self.options.check_exec && entry.mode_changed(fs_metadata);
676 || self.options.check_exec && entry.mode_changed(&fs_metadata);
656 677 let size = entry.size();
657 678 let size_changed = size != truncate_u64(fs_metadata.len());
658 679 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
659 680 // issue6456: Size returned may be longer due to encryption
660 681 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
661 682 self.push_outcome(Outcome::Unsure, dirstate_node)?
662 683 } else if dirstate_node.has_copy_source()
663 684 || entry.is_from_other_parent()
664 685 || (size >= 0 && (size_changed || mode_changed()))
665 686 {
666 687 self.push_outcome(Outcome::Modified, dirstate_node)?
667 688 } else {
668 689 let mtime_looks_clean;
669 690 if let Some(dirstate_mtime) = entry.truncated_mtime() {
670 let fs_mtime = TruncatedTimestamp::for_mtime_of(fs_metadata)
691 let fs_mtime = TruncatedTimestamp::for_mtime_of(&fs_metadata)
671 692 .expect("OS/libc does not support mtime?");
672 693 // There might be a change in the future if for example the
673 694 // internal clock become off while process run, but this is a
674 695 // case where the issues the user would face
675 696 // would be a lot worse and there is nothing we
676 697 // can really do.
677 698 mtime_looks_clean = fs_mtime.likely_equal(dirstate_mtime)
678 699 } else {
679 700 // No mtime in the dirstate entry
680 701 mtime_looks_clean = false
681 702 };
682 703 if !mtime_looks_clean {
683 704 self.push_outcome(Outcome::Unsure, dirstate_node)?
684 705 } else if self.options.list_clean {
685 706 self.push_outcome(Outcome::Clean, dirstate_node)?
686 707 }
687 708 }
688 709 Ok(())
689 710 }
690 711
691 712 /// A node in the dirstate tree has no corresponding filesystem entry
692 713 fn traverse_dirstate_only(
693 714 &self,
694 715 dirstate_node: NodeRef<'tree, 'on_disk>,
695 716 ) -> Result<(), DirstateV2ParseError> {
696 717 self.check_for_outdated_directory_cache(&dirstate_node)?;
697 718 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
698 719 dirstate_node
699 720 .children(self.dmap.on_disk)?
700 721 .par_iter()
701 722 .map(|child_node| self.traverse_dirstate_only(child_node))
702 723 .collect()
703 724 }
704 725
705 726 /// A node in the dirstate tree has no corresponding *file* on the
706 727 /// filesystem
707 728 ///
708 729 /// Does nothing on a "directory" node
709 730 fn mark_removed_or_deleted_if_file(
710 731 &self,
711 732 dirstate_node: &NodeRef<'tree, 'on_disk>,
712 733 ) -> Result<(), DirstateV2ParseError> {
713 734 if let Some(entry) = dirstate_node.entry()? {
714 735 if !entry.any_tracked() {
715 736 // Future-compat for when we start storing ignored and unknown
716 737 // files for caching reasons
717 738 return Ok(());
718 739 }
719 740 let path = dirstate_node.full_path(self.dmap.on_disk)?;
720 741 if self.matcher.matches(path) {
721 742 if entry.removed() {
722 743 self.push_outcome(Outcome::Removed, dirstate_node)?
723 744 } else {
724 745 self.push_outcome(Outcome::Deleted, &dirstate_node)?
725 746 }
726 747 }
727 748 }
728 749 Ok(())
729 750 }
730 751
731 752 /// Something in the filesystem has no corresponding dirstate node
732 753 ///
733 754 /// Returns whether that path is ignored
734 755 fn traverse_fs_only(
735 756 &self,
736 757 has_ignored_ancestor: bool,
737 758 directory_hg_path: &HgPath,
738 759 fs_entry: &DirEntry,
739 760 ) -> bool {
740 761 let hg_path = directory_hg_path.join(&fs_entry.hg_path);
741 let file_type = fs_entry.metadata.file_type();
742 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
743 if file_type.is_dir() {
762 let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink();
763 if fs_entry.is_dir() {
744 764 let is_ignored =
745 765 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
746 766 let traverse_children = if is_ignored {
747 767 // Descendants of an ignored directory are all ignored
748 768 self.options.list_ignored
749 769 } else {
750 770 // Descendants of an unknown directory may be either unknown or
751 771 // ignored
752 772 self.options.list_unknown || self.options.list_ignored
753 773 };
754 774 if traverse_children {
755 775 let is_at_repo_root = false;
756 if let Ok(children_fs_entries) = self.read_dir(
757 &hg_path,
758 &fs_entry.fs_path,
759 is_at_repo_root,
760 ) {
776 if let Ok(children_fs_entries) =
777 self.read_dir(&hg_path, &fs_entry.fs_path, is_at_repo_root)
778 {
761 779 children_fs_entries.par_iter().for_each(|child_fs_entry| {
762 780 self.traverse_fs_only(
763 781 is_ignored,
764 782 &hg_path,
765 783 child_fs_entry,
766 784 );
767 785 })
768 786 }
769 787 if self.options.collect_traversed_dirs {
770 788 self.outcome.lock().unwrap().traversed.push(hg_path.into())
771 789 }
772 790 }
773 791 is_ignored
774 792 } else {
775 793 if file_or_symlink {
776 794 if self.matcher.matches(&hg_path) {
777 795 self.mark_unknown_or_ignored(
778 796 has_ignored_ancestor,
779 797 &BorrowedPath::InMemory(&hg_path),
780 798 )
781 799 } else {
782 800 // We haven’t computed whether this path is ignored. It
783 801 // might not be, and a future run of status might have a
784 802 // different matcher that matches it. So treat it as not
785 803 // ignored. That is, inhibit readdir caching of the parent
786 804 // directory.
787 805 false
788 806 }
789 807 } else {
790 808 // This is neither a directory, a plain file, or a symlink.
791 809 // Treat it like an ignored file.
792 810 true
793 811 }
794 812 }
795 813 }
796 814
797 815 /// Returns whether that path is ignored
798 816 fn mark_unknown_or_ignored(
799 817 &self,
800 818 has_ignored_ancestor: bool,
801 819 hg_path: &BorrowedPath<'_, 'on_disk>,
802 820 ) -> bool {
803 821 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
804 822 if is_ignored {
805 823 if self.options.list_ignored {
806 824 self.push_outcome_without_copy_source(
807 825 Outcome::Ignored,
808 826 hg_path,
809 827 )
810 828 }
811 829 } else {
812 830 if self.options.list_unknown {
813 831 self.push_outcome_without_copy_source(
814 832 Outcome::Unknown,
815 833 hg_path,
816 834 )
817 835 }
818 836 }
819 837 is_ignored
820 838 }
821 839 }
822 840
823 struct DirEntry {
824 /// Path as stored in the dirstate
825 hg_path: HgPathBuf,
826 /// Filesystem path
827 fs_path: PathBuf,
828 metadata: std::fs::Metadata,
841 /// Since [`std::fs::FileType`] cannot be built directly, we emulate what we
842 /// care about.
843 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
844 enum FakeFileType {
845 File,
846 Directory,
847 Symlink,
829 848 }
830 849
831 impl DirEntry {
832 /// Returns **unsorted** entries in the given directory, with name and
833 /// metadata.
850 impl TryFrom<std::fs::FileType> for FakeFileType {
851 type Error = ();
852
853 fn try_from(f: std::fs::FileType) -> Result<Self, Self::Error> {
854 if f.is_dir() {
855 Ok(Self::Directory)
856 } else if f.is_file() {
857 Ok(Self::File)
858 } else if f.is_symlink() {
859 Ok(Self::Symlink)
860 } else {
861 // Things like FIFO etc.
862 Err(())
863 }
864 }
865 }
866
867 struct DirEntry<'a> {
868 /// Path as stored in the dirstate, or just the filename for optimization.
869 hg_path: HgPathCow<'a>,
870 /// Filesystem path
871 fs_path: Cow<'a, Path>,
872 /// Lazily computed
873 symlink_metadata: Option<std::fs::Metadata>,
874 /// Already computed for ergonomics.
875 file_type: FakeFileType,
876 }
877
878 impl<'a> DirEntry<'a> {
879 /// Returns **unsorted** entries in the given directory, with name,
880 /// metadata and file type.
834 881 ///
835 882 /// If a `.hg` sub-directory is encountered:
836 883 ///
837 884 /// * At the repository root, ignore that sub-directory
838 885 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
839 886 /// list instead.
840 887 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
841 888 // `read_dir` returns a "not found" error for the empty path
842 889 let at_cwd = path == Path::new("");
843 890 let read_dir_path = if at_cwd { Path::new(".") } else { path };
844 891 let mut results = Vec::new();
845 892 for entry in read_dir_path.read_dir()? {
846 893 let entry = entry?;
847 let metadata = match entry.metadata() {
894 let file_type = match entry.file_type() {
848 895 Ok(v) => v,
849 896 Err(e) => {
850 897 // race with file deletion?
851 898 if e.kind() == std::io::ErrorKind::NotFound {
852 899 continue;
853 900 } else {
854 901 return Err(e);
855 902 }
856 903 }
857 904 };
858 905 let file_name = entry.file_name();
859 906 // FIXME don't do this when cached
860 907 if file_name == ".hg" {
861 908 if is_at_repo_root {
862 909 // Skip the repo’s own .hg (might be a symlink)
863 910 continue;
864 } else if metadata.is_dir() {
911 } else if file_type.is_dir() {
865 912 // A .hg sub-directory at another location means a subrepo,
866 913 // skip it entirely.
867 914 return Ok(Vec::new());
868 915 }
869 916 }
870 917 let full_path = if at_cwd {
871 918 file_name.clone().into()
872 919 } else {
873 920 entry.path()
874 921 };
875 let base_name = get_bytes_from_os_string(file_name).into();
922 let filename =
923 Cow::Owned(get_bytes_from_os_string(file_name).into());
924 let file_type = match FakeFileType::try_from(file_type) {
925 Ok(file_type) => file_type,
926 Err(_) => continue,
927 };
876 928 results.push(DirEntry {
877 hg_path: base_name,
878 fs_path: full_path,
879 metadata,
929 hg_path: filename,
930 fs_path: Cow::Owned(full_path.to_path_buf()),
931 symlink_metadata: None,
932 file_type,
880 933 })
881 934 }
882 935 Ok(results)
883 936 }
937
938 fn symlink_metadata(&self) -> Result<std::fs::Metadata, std::io::Error> {
939 match &self.symlink_metadata {
940 Some(meta) => Ok(meta.clone()),
941 None => std::fs::symlink_metadata(&self.fs_path),
942 }
943 }
944
945 fn is_dir(&self) -> bool {
946 self.file_type == FakeFileType::Directory
947 }
948
949 fn is_file(&self) -> bool {
950 self.file_type == FakeFileType::File
951 }
952
953 fn is_symlink(&self) -> bool {
954 self.file_type == FakeFileType::Symlink
955 }
884 956 }
885 957
886 958 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
887 959 /// of the give repository.
888 960 ///
889 961 /// This is similar to `SystemTime::now()`, with the result truncated to the
890 962 /// same time resolution as other files’ modification times. Using `.hg`
891 963 /// instead of the system’s default temporary directory (such as `/tmp`) makes
892 964 /// it more likely the temporary file is in the same disk partition as contents
893 965 /// of the working directory, which can matter since different filesystems may
894 966 /// store timestamps with different resolutions.
895 967 ///
896 968 /// This may fail, typically if we lack write permissions. In that case we
897 969 /// should continue the `status()` algoritm anyway and consider the current
898 970 /// date/time to be unknown.
899 971 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
900 972 tempfile::tempfile_in(repo_root.join(".hg"))?
901 973 .metadata()?
902 974 .modified()
903 975 }
General Comments 0
You need to be logged in to leave comments. Login now