##// END OF EJS Templates
rust-status: fix thread count ceiling...
Raphaël Gomès -
r51052:c5243582 stable
parent child Browse files
Show More
@@ -1,931 +1,931 b''
1 1 use crate::dirstate::entry::TruncatedTimestamp;
2 2 use crate::dirstate::status::IgnoreFnType;
3 3 use crate::dirstate::status::StatusPath;
4 4 use crate::dirstate_tree::dirstate_map::BorrowedPath;
5 5 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
6 6 use crate::dirstate_tree::dirstate_map::DirstateMap;
7 7 use crate::dirstate_tree::dirstate_map::DirstateVersion;
8 8 use crate::dirstate_tree::dirstate_map::NodeRef;
9 9 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
10 10 use crate::matchers::get_ignore_function;
11 11 use crate::matchers::Matcher;
12 12 use crate::utils::files::get_bytes_from_os_string;
13 13 use crate::utils::files::get_bytes_from_path;
14 14 use crate::utils::files::get_path_from_bytes;
15 15 use crate::utils::hg_path::HgPath;
16 16 use crate::BadMatch;
17 17 use crate::DirstateStatus;
18 18 use crate::HgPathBuf;
19 19 use crate::HgPathCow;
20 20 use crate::PatternFileWarning;
21 21 use crate::StatusError;
22 22 use crate::StatusOptions;
23 23 use micro_timer::timed;
24 24 use once_cell::sync::OnceCell;
25 25 use rayon::prelude::*;
26 26 use sha1::{Digest, Sha1};
27 27 use std::borrow::Cow;
28 28 use std::io;
29 29 use std::path::Path;
30 30 use std::path::PathBuf;
31 31 use std::sync::Mutex;
32 32 use std::time::SystemTime;
33 33
34 34 /// Returns the status of the working directory compared to its parent
35 35 /// changeset.
36 36 ///
37 37 /// This algorithm is based on traversing the filesystem tree (`fs` in function
38 38 /// and variable names) and dirstate tree at the same time. The core of this
39 39 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
40 40 /// and its use of `itertools::merge_join_by`. When reaching a path that only
41 41 /// exists in one of the two trees, depending on information requested by
42 42 /// `options` we may need to traverse the remaining subtree.
43 43 #[timed]
44 44 pub fn status<'dirstate>(
45 45 dmap: &'dirstate mut DirstateMap,
46 46 matcher: &(dyn Matcher + Sync),
47 47 root_dir: PathBuf,
48 48 ignore_files: Vec<PathBuf>,
49 49 options: StatusOptions,
50 50 ) -> Result<(DirstateStatus<'dirstate>, Vec<PatternFileWarning>), StatusError>
51 51 {
52 52 // Force the global rayon threadpool to not exceed 16 concurrent threads.
53 53 // This is a stop-gap measure until we figure out why using more than 16
54 54 // threads makes `status` slower for each additional thread.
55 55 // We use `ok()` in case the global threadpool has already been
56 56 // instantiated in `rhg` or some other caller.
57 57 // TODO find the underlying cause and fix it, then remove this.
58 58 rayon::ThreadPoolBuilder::new()
59 .num_threads(16)
59 .num_threads(16.min(rayon::current_num_threads()))
60 60 .build_global()
61 61 .ok();
62 62
63 63 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
64 64 if options.list_ignored || options.list_unknown {
65 65 let (ignore_fn, warnings, changed) = match dmap.dirstate_version {
66 66 DirstateVersion::V1 => {
67 67 let (ignore_fn, warnings) = get_ignore_function(
68 68 ignore_files,
69 69 &root_dir,
70 70 &mut |_source, _pattern_bytes| {},
71 71 )?;
72 72 (ignore_fn, warnings, None)
73 73 }
74 74 DirstateVersion::V2 => {
75 75 let mut hasher = Sha1::new();
76 76 let (ignore_fn, warnings) = get_ignore_function(
77 77 ignore_files,
78 78 &root_dir,
79 79 &mut |source, pattern_bytes| {
80 80 // If inside the repo, use the relative version to
81 81 // make it deterministic inside tests.
82 82 // The performance hit should be negligible.
83 83 let source = source
84 84 .strip_prefix(&root_dir)
85 85 .unwrap_or(source);
86 86 let source = get_bytes_from_path(source);
87 87
88 88 let mut subhasher = Sha1::new();
89 89 subhasher.update(pattern_bytes);
90 90 let patterns_hash = subhasher.finalize();
91 91
92 92 hasher.update(source);
93 93 hasher.update(b" ");
94 94 hasher.update(patterns_hash);
95 95 hasher.update(b"\n");
96 96 },
97 97 )?;
98 98 let new_hash = *hasher.finalize().as_ref();
99 99 let changed = new_hash != dmap.ignore_patterns_hash;
100 100 dmap.ignore_patterns_hash = new_hash;
101 101 (ignore_fn, warnings, Some(changed))
102 102 }
103 103 };
104 104 (ignore_fn, warnings, changed)
105 105 } else {
106 106 (Box::new(|&_| true), vec![], None)
107 107 };
108 108
109 109 let filesystem_time_at_status_start =
110 110 filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
111 111
112 112 // If the repository is under the current directory, prefer using a
113 113 // relative path, so the kernel needs to traverse fewer directory in every
114 114 // call to `read_dir` or `symlink_metadata`.
115 115 // This is effective in the common case where the current directory is the
116 116 // repository root.
117 117
118 118 // TODO: Better yet would be to use libc functions like `openat` and
119 119 // `fstatat` to remove such repeated traversals entirely, but the standard
120 120 // library does not provide APIs based on those.
121 121 // Maybe with a crate like https://crates.io/crates/openat instead?
122 122 let root_dir = if let Some(relative) = std::env::current_dir()
123 123 .ok()
124 124 .and_then(|cwd| root_dir.strip_prefix(cwd).ok())
125 125 {
126 126 relative
127 127 } else {
128 128 &root_dir
129 129 };
130 130
131 131 let outcome = DirstateStatus {
132 132 filesystem_time_at_status_start,
133 133 ..Default::default()
134 134 };
135 135 let common = StatusCommon {
136 136 dmap,
137 137 options,
138 138 matcher,
139 139 ignore_fn,
140 140 outcome: Mutex::new(outcome),
141 141 ignore_patterns_have_changed: patterns_changed,
142 142 new_cacheable_directories: Default::default(),
143 143 outdated_cached_directories: Default::default(),
144 144 filesystem_time_at_status_start,
145 145 };
146 146 let is_at_repo_root = true;
147 147 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
148 148 let has_ignored_ancestor = HasIgnoredAncestor::create(None, hg_path);
149 149 let root_cached_mtime = None;
150 150 let root_dir_metadata = None;
151 151 // If the path we have for the repository root is a symlink, do follow it.
152 152 // (As opposed to symlinks within the working directory which are not
153 153 // followed, using `std::fs::symlink_metadata`.)
154 154 common.traverse_fs_directory_and_dirstate(
155 155 &has_ignored_ancestor,
156 156 dmap.root.as_ref(),
157 157 hg_path,
158 158 &root_dir,
159 159 root_dir_metadata,
160 160 root_cached_mtime,
161 161 is_at_repo_root,
162 162 )?;
163 163 let mut outcome = common.outcome.into_inner().unwrap();
164 164 let new_cacheable = common.new_cacheable_directories.into_inner().unwrap();
165 165 let outdated = common.outdated_cached_directories.into_inner().unwrap();
166 166
167 167 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
168 168 || !outdated.is_empty()
169 169 || (!new_cacheable.is_empty()
170 170 && dmap.dirstate_version == DirstateVersion::V2);
171 171
172 172 // Remove outdated mtimes before adding new mtimes, in case a given
173 173 // directory is both
174 174 for path in &outdated {
175 175 dmap.clear_cached_mtime(path)?;
176 176 }
177 177 for (path, mtime) in &new_cacheable {
178 178 dmap.set_cached_mtime(path, *mtime)?;
179 179 }
180 180
181 181 Ok((outcome, warnings))
182 182 }
183 183
184 184 /// Bag of random things needed by various parts of the algorithm. Reduces the
185 185 /// number of parameters passed to functions.
186 186 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
187 187 dmap: &'tree DirstateMap<'on_disk>,
188 188 options: StatusOptions,
189 189 matcher: &'a (dyn Matcher + Sync),
190 190 ignore_fn: IgnoreFnType<'a>,
191 191 outcome: Mutex<DirstateStatus<'on_disk>>,
192 192 /// New timestamps of directories to be used for caching their readdirs
193 193 new_cacheable_directories:
194 194 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
195 195 /// Used to invalidate the readdir cache of directories
196 196 outdated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
197 197
198 198 /// Whether ignore files like `.hgignore` have changed since the previous
199 199 /// time a `status()` call wrote their hash to the dirstate. `None` means
200 200 /// we don’t know as this run doesn’t list either ignored or uknown files
201 201 /// and therefore isn’t reading `.hgignore`.
202 202 ignore_patterns_have_changed: Option<bool>,
203 203
204 204 /// The current time at the start of the `status()` algorithm, as measured
205 205 /// and possibly truncated by the filesystem.
206 206 filesystem_time_at_status_start: Option<TruncatedTimestamp>,
207 207 }
208 208
209 209 enum Outcome {
210 210 Modified,
211 211 Added,
212 212 Removed,
213 213 Deleted,
214 214 Clean,
215 215 Ignored,
216 216 Unknown,
217 217 Unsure,
218 218 }
219 219
220 220 /// Lazy computation of whether a given path has a hgignored
221 221 /// ancestor.
222 222 struct HasIgnoredAncestor<'a> {
223 223 /// `path` and `parent` constitute the inputs to the computation,
224 224 /// `cache` stores the outcome.
225 225 path: &'a HgPath,
226 226 parent: Option<&'a HasIgnoredAncestor<'a>>,
227 227 cache: OnceCell<bool>,
228 228 }
229 229
230 230 impl<'a> HasIgnoredAncestor<'a> {
231 231 fn create(
232 232 parent: Option<&'a HasIgnoredAncestor<'a>>,
233 233 path: &'a HgPath,
234 234 ) -> HasIgnoredAncestor<'a> {
235 235 Self {
236 236 path,
237 237 parent,
238 238 cache: OnceCell::new(),
239 239 }
240 240 }
241 241
242 242 fn force<'b>(&self, ignore_fn: &IgnoreFnType<'b>) -> bool {
243 243 match self.parent {
244 244 None => false,
245 245 Some(parent) => {
246 246 *(parent.cache.get_or_init(|| {
247 247 parent.force(ignore_fn) || ignore_fn(&self.path)
248 248 }))
249 249 }
250 250 }
251 251 }
252 252 }
253 253
254 254 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
255 255 fn push_outcome(
256 256 &self,
257 257 which: Outcome,
258 258 dirstate_node: &NodeRef<'tree, 'on_disk>,
259 259 ) -> Result<(), DirstateV2ParseError> {
260 260 let path = dirstate_node
261 261 .full_path_borrowed(self.dmap.on_disk)?
262 262 .detach_from_tree();
263 263 let copy_source = if self.options.list_copies {
264 264 dirstate_node
265 265 .copy_source_borrowed(self.dmap.on_disk)?
266 266 .map(|source| source.detach_from_tree())
267 267 } else {
268 268 None
269 269 };
270 270 self.push_outcome_common(which, path, copy_source);
271 271 Ok(())
272 272 }
273 273
274 274 fn push_outcome_without_copy_source(
275 275 &self,
276 276 which: Outcome,
277 277 path: &BorrowedPath<'_, 'on_disk>,
278 278 ) {
279 279 self.push_outcome_common(which, path.detach_from_tree(), None)
280 280 }
281 281
282 282 fn push_outcome_common(
283 283 &self,
284 284 which: Outcome,
285 285 path: HgPathCow<'on_disk>,
286 286 copy_source: Option<HgPathCow<'on_disk>>,
287 287 ) {
288 288 let mut outcome = self.outcome.lock().unwrap();
289 289 let vec = match which {
290 290 Outcome::Modified => &mut outcome.modified,
291 291 Outcome::Added => &mut outcome.added,
292 292 Outcome::Removed => &mut outcome.removed,
293 293 Outcome::Deleted => &mut outcome.deleted,
294 294 Outcome::Clean => &mut outcome.clean,
295 295 Outcome::Ignored => &mut outcome.ignored,
296 296 Outcome::Unknown => &mut outcome.unknown,
297 297 Outcome::Unsure => &mut outcome.unsure,
298 298 };
299 299 vec.push(StatusPath { path, copy_source });
300 300 }
301 301
302 302 fn read_dir(
303 303 &self,
304 304 hg_path: &HgPath,
305 305 fs_path: &Path,
306 306 is_at_repo_root: bool,
307 307 ) -> Result<Vec<DirEntry>, ()> {
308 308 DirEntry::read_dir(fs_path, is_at_repo_root)
309 309 .map_err(|error| self.io_error(error, hg_path))
310 310 }
311 311
312 312 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
313 313 let errno = error.raw_os_error().expect("expected real OS error");
314 314 self.outcome
315 315 .lock()
316 316 .unwrap()
317 317 .bad
318 318 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
319 319 }
320 320
321 321 fn check_for_outdated_directory_cache(
322 322 &self,
323 323 dirstate_node: &NodeRef<'tree, 'on_disk>,
324 324 ) -> Result<bool, DirstateV2ParseError> {
325 325 if self.ignore_patterns_have_changed == Some(true)
326 326 && dirstate_node.cached_directory_mtime()?.is_some()
327 327 {
328 328 self.outdated_cached_directories.lock().unwrap().push(
329 329 dirstate_node
330 330 .full_path_borrowed(self.dmap.on_disk)?
331 331 .detach_from_tree(),
332 332 );
333 333 return Ok(true);
334 334 }
335 335 Ok(false)
336 336 }
337 337
338 338 /// If this returns true, we can get accurate results by only using
339 339 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
340 340 /// need to call `read_dir`.
341 341 fn can_skip_fs_readdir(
342 342 &self,
343 343 directory_metadata: Option<&std::fs::Metadata>,
344 344 cached_directory_mtime: Option<TruncatedTimestamp>,
345 345 ) -> bool {
346 346 if !self.options.list_unknown && !self.options.list_ignored {
347 347 // All states that we care about listing have corresponding
348 348 // dirstate entries.
349 349 // This happens for example with `hg status -mard`.
350 350 return true;
351 351 }
352 352 if !self.options.list_ignored
353 353 && self.ignore_patterns_have_changed == Some(false)
354 354 {
355 355 if let Some(cached_mtime) = cached_directory_mtime {
356 356 // The dirstate contains a cached mtime for this directory, set
357 357 // by a previous run of the `status` algorithm which found this
358 358 // directory eligible for `read_dir` caching.
359 359 if let Some(meta) = directory_metadata {
360 360 if cached_mtime
361 361 .likely_equal_to_mtime_of(meta)
362 362 .unwrap_or(false)
363 363 {
364 364 // The mtime of that directory has not changed
365 365 // since then, which means that the results of
366 366 // `read_dir` should also be unchanged.
367 367 return true;
368 368 }
369 369 }
370 370 }
371 371 }
372 372 false
373 373 }
374 374
375 375 /// Returns whether all child entries of the filesystem directory have a
376 376 /// corresponding dirstate node or are ignored.
377 377 fn traverse_fs_directory_and_dirstate<'ancestor>(
378 378 &self,
379 379 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
380 380 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
381 381 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
382 382 directory_fs_path: &Path,
383 383 directory_metadata: Option<&std::fs::Metadata>,
384 384 cached_directory_mtime: Option<TruncatedTimestamp>,
385 385 is_at_repo_root: bool,
386 386 ) -> Result<bool, DirstateV2ParseError> {
387 387 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
388 388 {
389 389 dirstate_nodes
390 390 .par_iter()
391 391 .map(|dirstate_node| {
392 392 let fs_path = directory_fs_path.join(get_path_from_bytes(
393 393 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
394 394 ));
395 395 match std::fs::symlink_metadata(&fs_path) {
396 396 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
397 397 &fs_path,
398 398 &fs_metadata,
399 399 dirstate_node,
400 400 has_ignored_ancestor,
401 401 ),
402 402 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
403 403 self.traverse_dirstate_only(dirstate_node)
404 404 }
405 405 Err(error) => {
406 406 let hg_path =
407 407 dirstate_node.full_path(self.dmap.on_disk)?;
408 408 Ok(self.io_error(error, hg_path))
409 409 }
410 410 }
411 411 })
412 412 .collect::<Result<_, _>>()?;
413 413
414 414 // We don’t know, so conservatively say this isn’t the case
415 415 let children_all_have_dirstate_node_or_are_ignored = false;
416 416
417 417 return Ok(children_all_have_dirstate_node_or_are_ignored);
418 418 }
419 419
420 420 let mut fs_entries = if let Ok(entries) = self.read_dir(
421 421 directory_hg_path,
422 422 directory_fs_path,
423 423 is_at_repo_root,
424 424 ) {
425 425 entries
426 426 } else {
427 427 // Treat an unreadable directory (typically because of insufficient
428 428 // permissions) like an empty directory. `self.read_dir` has
429 429 // already called `self.io_error` so a warning will be emitted.
430 430 Vec::new()
431 431 };
432 432
433 433 // `merge_join_by` requires both its input iterators to be sorted:
434 434
435 435 let dirstate_nodes = dirstate_nodes.sorted();
436 436 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
437 437 // https://github.com/rust-lang/rust/issues/34162
438 438 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
439 439
440 440 // Propagate here any error that would happen inside the comparison
441 441 // callback below
442 442 for dirstate_node in &dirstate_nodes {
443 443 dirstate_node.base_name(self.dmap.on_disk)?;
444 444 }
445 445 itertools::merge_join_by(
446 446 dirstate_nodes,
447 447 &fs_entries,
448 448 |dirstate_node, fs_entry| {
449 449 // This `unwrap` never panics because we already propagated
450 450 // those errors above
451 451 dirstate_node
452 452 .base_name(self.dmap.on_disk)
453 453 .unwrap()
454 454 .cmp(&fs_entry.base_name)
455 455 },
456 456 )
457 457 .par_bridge()
458 458 .map(|pair| {
459 459 use itertools::EitherOrBoth::*;
460 460 let has_dirstate_node_or_is_ignored;
461 461 match pair {
462 462 Both(dirstate_node, fs_entry) => {
463 463 self.traverse_fs_and_dirstate(
464 464 &fs_entry.full_path,
465 465 &fs_entry.metadata,
466 466 dirstate_node,
467 467 has_ignored_ancestor,
468 468 )?;
469 469 has_dirstate_node_or_is_ignored = true
470 470 }
471 471 Left(dirstate_node) => {
472 472 self.traverse_dirstate_only(dirstate_node)?;
473 473 has_dirstate_node_or_is_ignored = true;
474 474 }
475 475 Right(fs_entry) => {
476 476 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
477 477 has_ignored_ancestor.force(&self.ignore_fn),
478 478 directory_hg_path,
479 479 fs_entry,
480 480 )
481 481 }
482 482 }
483 483 Ok(has_dirstate_node_or_is_ignored)
484 484 })
485 485 .try_reduce(|| true, |a, b| Ok(a && b))
486 486 }
487 487
488 488 fn traverse_fs_and_dirstate<'ancestor>(
489 489 &self,
490 490 fs_path: &Path,
491 491 fs_metadata: &std::fs::Metadata,
492 492 dirstate_node: NodeRef<'tree, 'on_disk>,
493 493 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
494 494 ) -> Result<(), DirstateV2ParseError> {
495 495 let outdated_dircache =
496 496 self.check_for_outdated_directory_cache(&dirstate_node)?;
497 497 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
498 498 let file_type = fs_metadata.file_type();
499 499 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
500 500 if !file_or_symlink {
501 501 // If we previously had a file here, it was removed (with
502 502 // `hg rm` or similar) or deleted before it could be
503 503 // replaced by a directory or something else.
504 504 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
505 505 }
506 506 if file_type.is_dir() {
507 507 if self.options.collect_traversed_dirs {
508 508 self.outcome
509 509 .lock()
510 510 .unwrap()
511 511 .traversed
512 512 .push(hg_path.detach_from_tree())
513 513 }
514 514 let is_ignored = HasIgnoredAncestor::create(
515 515 Some(&has_ignored_ancestor),
516 516 hg_path,
517 517 );
518 518 let is_at_repo_root = false;
519 519 let children_all_have_dirstate_node_or_are_ignored = self
520 520 .traverse_fs_directory_and_dirstate(
521 521 &is_ignored,
522 522 dirstate_node.children(self.dmap.on_disk)?,
523 523 hg_path,
524 524 fs_path,
525 525 Some(fs_metadata),
526 526 dirstate_node.cached_directory_mtime()?,
527 527 is_at_repo_root,
528 528 )?;
529 529 self.maybe_save_directory_mtime(
530 530 children_all_have_dirstate_node_or_are_ignored,
531 531 fs_metadata,
532 532 dirstate_node,
533 533 outdated_dircache,
534 534 )?
535 535 } else {
536 536 if file_or_symlink && self.matcher.matches(&hg_path) {
537 537 if let Some(entry) = dirstate_node.entry()? {
538 538 if !entry.any_tracked() {
539 539 // Forward-compat if we start tracking unknown/ignored
540 540 // files for caching reasons
541 541 self.mark_unknown_or_ignored(
542 542 has_ignored_ancestor.force(&self.ignore_fn),
543 543 &hg_path,
544 544 );
545 545 }
546 546 if entry.added() {
547 547 self.push_outcome(Outcome::Added, &dirstate_node)?;
548 548 } else if entry.removed() {
549 549 self.push_outcome(Outcome::Removed, &dirstate_node)?;
550 550 } else if entry.modified() {
551 551 self.push_outcome(Outcome::Modified, &dirstate_node)?;
552 552 } else {
553 553 self.handle_normal_file(&dirstate_node, fs_metadata)?;
554 554 }
555 555 } else {
556 556 // `node.entry.is_none()` indicates a "directory"
557 557 // node, but the filesystem has a file
558 558 self.mark_unknown_or_ignored(
559 559 has_ignored_ancestor.force(&self.ignore_fn),
560 560 hg_path,
561 561 );
562 562 }
563 563 }
564 564
565 565 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
566 566 {
567 567 self.traverse_dirstate_only(child_node)?
568 568 }
569 569 }
570 570 Ok(())
571 571 }
572 572
573 573 /// Save directory mtime if applicable.
574 574 ///
575 575 /// `outdated_directory_cache` is `true` if we've just invalidated the
576 576 /// cache for this directory in `check_for_outdated_directory_cache`,
577 577 /// which forces the update.
578 578 fn maybe_save_directory_mtime(
579 579 &self,
580 580 children_all_have_dirstate_node_or_are_ignored: bool,
581 581 directory_metadata: &std::fs::Metadata,
582 582 dirstate_node: NodeRef<'tree, 'on_disk>,
583 583 outdated_directory_cache: bool,
584 584 ) -> Result<(), DirstateV2ParseError> {
585 585 if !children_all_have_dirstate_node_or_are_ignored {
586 586 return Ok(());
587 587 }
588 588 // All filesystem directory entries from `read_dir` have a
589 589 // corresponding node in the dirstate, so we can reconstitute the
590 590 // names of those entries without calling `read_dir` again.
591 591
592 592 // TODO: use let-else here and below when available:
593 593 // https://github.com/rust-lang/rust/issues/87335
594 594 let status_start = if let Some(status_start) =
595 595 &self.filesystem_time_at_status_start
596 596 {
597 597 status_start
598 598 } else {
599 599 return Ok(());
600 600 };
601 601
602 602 // Although the Rust standard library’s `SystemTime` type
603 603 // has nanosecond precision, the times reported for a
604 604 // directory’s (or file’s) modified time may have lower
605 605 // resolution based on the filesystem (for example ext3
606 606 // only stores integer seconds), kernel (see
607 607 // https://stackoverflow.com/a/14393315/1162888), etc.
608 608 let directory_mtime = if let Ok(option) =
609 609 TruncatedTimestamp::for_reliable_mtime_of(
610 610 directory_metadata,
611 611 status_start,
612 612 ) {
613 613 if let Some(directory_mtime) = option {
614 614 directory_mtime
615 615 } else {
616 616 // The directory was modified too recently,
617 617 // don’t cache its `read_dir` results.
618 618 //
619 619 // 1. A change to this directory (direct child was
620 620 // added or removed) cause its mtime to be set
621 621 // (possibly truncated) to `directory_mtime`
622 622 // 2. This `status` algorithm calls `read_dir`
623 623 // 3. An other change is made to the same directory is
624 624 // made so that calling `read_dir` agin would give
625 625 // different results, but soon enough after 1. that
626 626 // the mtime stays the same
627 627 //
628 628 // On a system where the time resolution poor, this
629 629 // scenario is not unlikely if all three steps are caused
630 630 // by the same script.
631 631 return Ok(());
632 632 }
633 633 } else {
634 634 // OS/libc does not support mtime?
635 635 return Ok(());
636 636 };
637 637 // We’ve observed (through `status_start`) that time has
638 638 // “progressed” since `directory_mtime`, so any further
639 639 // change to this directory is extremely likely to cause a
640 640 // different mtime.
641 641 //
642 642 // Having the same mtime again is not entirely impossible
643 643 // since the system clock is not monotonous. It could jump
644 644 // backward to some point before `directory_mtime`, then a
645 645 // directory change could potentially happen during exactly
646 646 // the wrong tick.
647 647 //
648 648 // We deem this scenario (unlike the previous one) to be
649 649 // unlikely enough in practice.
650 650
651 651 let is_up_to_date = if let Some(cached) =
652 652 dirstate_node.cached_directory_mtime()?
653 653 {
654 654 !outdated_directory_cache && cached.likely_equal(directory_mtime)
655 655 } else {
656 656 false
657 657 };
658 658 if !is_up_to_date {
659 659 let hg_path = dirstate_node
660 660 .full_path_borrowed(self.dmap.on_disk)?
661 661 .detach_from_tree();
662 662 self.new_cacheable_directories
663 663 .lock()
664 664 .unwrap()
665 665 .push((hg_path, directory_mtime))
666 666 }
667 667 Ok(())
668 668 }
669 669
670 670 /// A file that is clean in the dirstate was found in the filesystem
671 671 fn handle_normal_file(
672 672 &self,
673 673 dirstate_node: &NodeRef<'tree, 'on_disk>,
674 674 fs_metadata: &std::fs::Metadata,
675 675 ) -> Result<(), DirstateV2ParseError> {
676 676 // Keep the low 31 bits
677 677 fn truncate_u64(value: u64) -> i32 {
678 678 (value & 0x7FFF_FFFF) as i32
679 679 }
680 680
681 681 let entry = dirstate_node
682 682 .entry()?
683 683 .expect("handle_normal_file called with entry-less node");
684 684 let mode_changed =
685 685 || self.options.check_exec && entry.mode_changed(fs_metadata);
686 686 let size = entry.size();
687 687 let size_changed = size != truncate_u64(fs_metadata.len());
688 688 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
689 689 // issue6456: Size returned may be longer due to encryption
690 690 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
691 691 self.push_outcome(Outcome::Unsure, dirstate_node)?
692 692 } else if dirstate_node.has_copy_source()
693 693 || entry.is_from_other_parent()
694 694 || (size >= 0 && (size_changed || mode_changed()))
695 695 {
696 696 self.push_outcome(Outcome::Modified, dirstate_node)?
697 697 } else {
698 698 let mtime_looks_clean;
699 699 if let Some(dirstate_mtime) = entry.truncated_mtime() {
700 700 let fs_mtime = TruncatedTimestamp::for_mtime_of(fs_metadata)
701 701 .expect("OS/libc does not support mtime?");
702 702 // There might be a change in the future if for example the
703 703 // internal clock become off while process run, but this is a
704 704 // case where the issues the user would face
705 705 // would be a lot worse and there is nothing we
706 706 // can really do.
707 707 mtime_looks_clean = fs_mtime.likely_equal(dirstate_mtime)
708 708 } else {
709 709 // No mtime in the dirstate entry
710 710 mtime_looks_clean = false
711 711 };
712 712 if !mtime_looks_clean {
713 713 self.push_outcome(Outcome::Unsure, dirstate_node)?
714 714 } else if self.options.list_clean {
715 715 self.push_outcome(Outcome::Clean, dirstate_node)?
716 716 }
717 717 }
718 718 Ok(())
719 719 }
720 720
721 721 /// A node in the dirstate tree has no corresponding filesystem entry
722 722 fn traverse_dirstate_only(
723 723 &self,
724 724 dirstate_node: NodeRef<'tree, 'on_disk>,
725 725 ) -> Result<(), DirstateV2ParseError> {
726 726 self.check_for_outdated_directory_cache(&dirstate_node)?;
727 727 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
728 728 dirstate_node
729 729 .children(self.dmap.on_disk)?
730 730 .par_iter()
731 731 .map(|child_node| self.traverse_dirstate_only(child_node))
732 732 .collect()
733 733 }
734 734
735 735 /// A node in the dirstate tree has no corresponding *file* on the
736 736 /// filesystem
737 737 ///
738 738 /// Does nothing on a "directory" node
739 739 fn mark_removed_or_deleted_if_file(
740 740 &self,
741 741 dirstate_node: &NodeRef<'tree, 'on_disk>,
742 742 ) -> Result<(), DirstateV2ParseError> {
743 743 if let Some(entry) = dirstate_node.entry()? {
744 744 if !entry.any_tracked() {
745 745 // Future-compat for when we start storing ignored and unknown
746 746 // files for caching reasons
747 747 return Ok(());
748 748 }
749 749 let path = dirstate_node.full_path(self.dmap.on_disk)?;
750 750 if self.matcher.matches(path) {
751 751 if entry.removed() {
752 752 self.push_outcome(Outcome::Removed, dirstate_node)?
753 753 } else {
754 754 self.push_outcome(Outcome::Deleted, &dirstate_node)?
755 755 }
756 756 }
757 757 }
758 758 Ok(())
759 759 }
760 760
761 761 /// Something in the filesystem has no corresponding dirstate node
762 762 ///
763 763 /// Returns whether that path is ignored
764 764 fn traverse_fs_only(
765 765 &self,
766 766 has_ignored_ancestor: bool,
767 767 directory_hg_path: &HgPath,
768 768 fs_entry: &DirEntry,
769 769 ) -> bool {
770 770 let hg_path = directory_hg_path.join(&fs_entry.base_name);
771 771 let file_type = fs_entry.metadata.file_type();
772 772 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
773 773 if file_type.is_dir() {
774 774 let is_ignored =
775 775 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
776 776 let traverse_children = if is_ignored {
777 777 // Descendants of an ignored directory are all ignored
778 778 self.options.list_ignored
779 779 } else {
780 780 // Descendants of an unknown directory may be either unknown or
781 781 // ignored
782 782 self.options.list_unknown || self.options.list_ignored
783 783 };
784 784 if traverse_children {
785 785 let is_at_repo_root = false;
786 786 if let Ok(children_fs_entries) = self.read_dir(
787 787 &hg_path,
788 788 &fs_entry.full_path,
789 789 is_at_repo_root,
790 790 ) {
791 791 children_fs_entries.par_iter().for_each(|child_fs_entry| {
792 792 self.traverse_fs_only(
793 793 is_ignored,
794 794 &hg_path,
795 795 child_fs_entry,
796 796 );
797 797 })
798 798 }
799 799 if self.options.collect_traversed_dirs {
800 800 self.outcome.lock().unwrap().traversed.push(hg_path.into())
801 801 }
802 802 }
803 803 is_ignored
804 804 } else {
805 805 if file_or_symlink {
806 806 if self.matcher.matches(&hg_path) {
807 807 self.mark_unknown_or_ignored(
808 808 has_ignored_ancestor,
809 809 &BorrowedPath::InMemory(&hg_path),
810 810 )
811 811 } else {
812 812 // We haven’t computed whether this path is ignored. It
813 813 // might not be, and a future run of status might have a
814 814 // different matcher that matches it. So treat it as not
815 815 // ignored. That is, inhibit readdir caching of the parent
816 816 // directory.
817 817 false
818 818 }
819 819 } else {
820 820 // This is neither a directory, a plain file, or a symlink.
821 821 // Treat it like an ignored file.
822 822 true
823 823 }
824 824 }
825 825 }
826 826
827 827 /// Returns whether that path is ignored
828 828 fn mark_unknown_or_ignored(
829 829 &self,
830 830 has_ignored_ancestor: bool,
831 831 hg_path: &BorrowedPath<'_, 'on_disk>,
832 832 ) -> bool {
833 833 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
834 834 if is_ignored {
835 835 if self.options.list_ignored {
836 836 self.push_outcome_without_copy_source(
837 837 Outcome::Ignored,
838 838 hg_path,
839 839 )
840 840 }
841 841 } else {
842 842 if self.options.list_unknown {
843 843 self.push_outcome_without_copy_source(
844 844 Outcome::Unknown,
845 845 hg_path,
846 846 )
847 847 }
848 848 }
849 849 is_ignored
850 850 }
851 851 }
852 852
853 853 struct DirEntry {
854 854 base_name: HgPathBuf,
855 855 full_path: PathBuf,
856 856 metadata: std::fs::Metadata,
857 857 }
858 858
859 859 impl DirEntry {
860 860 /// Returns **unsorted** entries in the given directory, with name and
861 861 /// metadata.
862 862 ///
863 863 /// If a `.hg` sub-directory is encountered:
864 864 ///
865 865 /// * At the repository root, ignore that sub-directory
866 866 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
867 867 /// list instead.
868 868 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
869 869 // `read_dir` returns a "not found" error for the empty path
870 870 let at_cwd = path == Path::new("");
871 871 let read_dir_path = if at_cwd { Path::new(".") } else { path };
872 872 let mut results = Vec::new();
873 873 for entry in read_dir_path.read_dir()? {
874 874 let entry = entry?;
875 875 let metadata = match entry.metadata() {
876 876 Ok(v) => v,
877 877 Err(e) => {
878 878 // race with file deletion?
879 879 if e.kind() == std::io::ErrorKind::NotFound {
880 880 continue;
881 881 } else {
882 882 return Err(e);
883 883 }
884 884 }
885 885 };
886 886 let file_name = entry.file_name();
887 887 // FIXME don't do this when cached
888 888 if file_name == ".hg" {
889 889 if is_at_repo_root {
890 890 // Skip the repo’s own .hg (might be a symlink)
891 891 continue;
892 892 } else if metadata.is_dir() {
893 893 // A .hg sub-directory at another location means a subrepo,
894 894 // skip it entirely.
895 895 return Ok(Vec::new());
896 896 }
897 897 }
898 898 let full_path = if at_cwd {
899 899 file_name.clone().into()
900 900 } else {
901 901 entry.path()
902 902 };
903 903 let base_name = get_bytes_from_os_string(file_name).into();
904 904 results.push(DirEntry {
905 905 base_name,
906 906 full_path,
907 907 metadata,
908 908 })
909 909 }
910 910 Ok(results)
911 911 }
912 912 }
913 913
914 914 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
915 915 /// of the give repository.
916 916 ///
917 917 /// This is similar to `SystemTime::now()`, with the result truncated to the
918 918 /// same time resolution as other files’ modification times. Using `.hg`
919 919 /// instead of the system’s default temporary directory (such as `/tmp`) makes
920 920 /// it more likely the temporary file is in the same disk partition as contents
921 921 /// of the working directory, which can matter since different filesystems may
922 922 /// store timestamps with different resolutions.
923 923 ///
924 924 /// This may fail, typically if we lack write permissions. In that case we
925 925 /// should continue the `status()` algoritm anyway and consider the current
926 926 /// date/time to be unknown.
927 927 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
928 928 tempfile::tempfile_in(repo_root.join(".hg"))?
929 929 .metadata()?
930 930 .modified()
931 931 }
General Comments 0
You need to be logged in to leave comments. Login now