##// END OF EJS Templates
rust-status: cap the number of concurrent threads to 16...
Raphaël Gomès -
r49828:f8025bfc stable draft
parent child Browse files
Show More
@@ -1,837 +1,848 b''
1 1 use crate::dirstate::entry::TruncatedTimestamp;
2 2 use crate::dirstate::status::IgnoreFnType;
3 3 use crate::dirstate::status::StatusPath;
4 4 use crate::dirstate_tree::dirstate_map::BorrowedPath;
5 5 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
6 6 use crate::dirstate_tree::dirstate_map::DirstateMap;
7 7 use crate::dirstate_tree::dirstate_map::NodeData;
8 8 use crate::dirstate_tree::dirstate_map::NodeRef;
9 9 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
10 10 use crate::matchers::get_ignore_function;
11 11 use crate::matchers::Matcher;
12 12 use crate::utils::files::get_bytes_from_os_string;
13 13 use crate::utils::files::get_path_from_bytes;
14 14 use crate::utils::hg_path::HgPath;
15 15 use crate::BadMatch;
16 16 use crate::DirstateStatus;
17 17 use crate::EntryState;
18 18 use crate::HgPathBuf;
19 19 use crate::HgPathCow;
20 20 use crate::PatternFileWarning;
21 21 use crate::StatusError;
22 22 use crate::StatusOptions;
23 23 use micro_timer::timed;
24 24 use rayon::prelude::*;
25 25 use sha1::{Digest, Sha1};
26 26 use std::borrow::Cow;
27 27 use std::io;
28 28 use std::path::Path;
29 29 use std::path::PathBuf;
30 30 use std::sync::Mutex;
31 31 use std::time::SystemTime;
32 32
33 33 /// Returns the status of the working directory compared to its parent
34 34 /// changeset.
35 35 ///
36 36 /// This algorithm is based on traversing the filesystem tree (`fs` in function
37 37 /// and variable names) and dirstate tree at the same time. The core of this
38 38 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
39 39 /// and its use of `itertools::merge_join_by`. When reaching a path that only
40 40 /// exists in one of the two trees, depending on information requested by
41 41 /// `options` we may need to traverse the remaining subtree.
42 42 #[timed]
43 43 pub fn status<'tree, 'on_disk: 'tree>(
44 44 dmap: &'tree mut DirstateMap<'on_disk>,
45 45 matcher: &(dyn Matcher + Sync),
46 46 root_dir: PathBuf,
47 47 ignore_files: Vec<PathBuf>,
48 48 options: StatusOptions,
49 49 ) -> Result<(DirstateStatus<'on_disk>, Vec<PatternFileWarning>), StatusError> {
50 // Force the global rayon threadpool to not exceed 16 concurrent threads.
51 // This is a stop-gap measure until we figure out why using more than 16
52 // threads makes `status` slower for each additional thread.
53 // We use `ok()` in case the global threadpool has already been instantiated
54 // in `rhg` or some other caller.
55 // TODO find the underlying cause and fix it, then remove this.
56 rayon::ThreadPoolBuilder::new()
57 .num_threads(16)
58 .build_global()
59 .ok();
60
50 61 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
51 62 if options.list_ignored || options.list_unknown {
52 63 let mut hasher = Sha1::new();
53 64 let (ignore_fn, warnings) = get_ignore_function(
54 65 ignore_files,
55 66 &root_dir,
56 67 &mut |pattern_bytes| hasher.update(pattern_bytes),
57 68 )?;
58 69 let new_hash = *hasher.finalize().as_ref();
59 70 let changed = new_hash != dmap.ignore_patterns_hash;
60 71 dmap.ignore_patterns_hash = new_hash;
61 72 (ignore_fn, warnings, Some(changed))
62 73 } else {
63 74 (Box::new(|&_| true), vec![], None)
64 75 };
65 76
66 77 let filesystem_time_at_status_start =
67 78 filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
68 79
69 80 // If the repository is under the current directory, prefer using a
70 81 // relative path, so the kernel needs to traverse fewer directory in every
71 82 // call to `read_dir` or `symlink_metadata`.
72 83 // This is effective in the common case where the current directory is the
73 84 // repository root.
74 85
75 86 // TODO: Better yet would be to use libc functions like `openat` and
76 87 // `fstatat` to remove such repeated traversals entirely, but the standard
77 88 // library does not provide APIs based on those.
78 89 // Maybe with a crate like https://crates.io/crates/openat instead?
79 90 let root_dir = if let Some(relative) = std::env::current_dir()
80 91 .ok()
81 92 .and_then(|cwd| root_dir.strip_prefix(cwd).ok())
82 93 {
83 94 relative
84 95 } else {
85 96 &root_dir
86 97 };
87 98
88 99 let outcome = DirstateStatus {
89 100 filesystem_time_at_status_start,
90 101 ..Default::default()
91 102 };
92 103 let common = StatusCommon {
93 104 dmap,
94 105 options,
95 106 matcher,
96 107 ignore_fn,
97 108 outcome: Mutex::new(outcome),
98 109 ignore_patterns_have_changed: patterns_changed,
99 110 new_cachable_directories: Default::default(),
100 111 outated_cached_directories: Default::default(),
101 112 filesystem_time_at_status_start,
102 113 };
103 114 let is_at_repo_root = true;
104 115 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
105 116 let has_ignored_ancestor = false;
106 117 let root_cached_mtime = None;
107 118 let root_dir_metadata = None;
108 119 // If the path we have for the repository root is a symlink, do follow it.
109 120 // (As opposed to symlinks within the working directory which are not
110 121 // followed, using `std::fs::symlink_metadata`.)
111 122 common.traverse_fs_directory_and_dirstate(
112 123 has_ignored_ancestor,
113 124 dmap.root.as_ref(),
114 125 hg_path,
115 126 &root_dir,
116 127 root_dir_metadata,
117 128 root_cached_mtime,
118 129 is_at_repo_root,
119 130 )?;
120 131 let mut outcome = common.outcome.into_inner().unwrap();
121 132 let new_cachable = common.new_cachable_directories.into_inner().unwrap();
122 133 let outdated = common.outated_cached_directories.into_inner().unwrap();
123 134
124 135 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
125 136 || !outdated.is_empty()
126 137 || !new_cachable.is_empty();
127 138
128 139 // Remove outdated mtimes before adding new mtimes, in case a given
129 140 // directory is both
130 141 for path in &outdated {
131 142 let node = dmap.get_or_insert(path)?;
132 143 if let NodeData::CachedDirectory { .. } = &node.data {
133 144 node.data = NodeData::None
134 145 }
135 146 }
136 147 for (path, mtime) in &new_cachable {
137 148 let node = dmap.get_or_insert(path)?;
138 149 match &node.data {
139 150 NodeData::Entry(_) => {} // Don’t overwrite an entry
140 151 NodeData::CachedDirectory { .. } | NodeData::None => {
141 152 node.data = NodeData::CachedDirectory { mtime: *mtime }
142 153 }
143 154 }
144 155 }
145 156
146 157 Ok((outcome, warnings))
147 158 }
148 159
149 160 /// Bag of random things needed by various parts of the algorithm. Reduces the
150 161 /// number of parameters passed to functions.
151 162 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
152 163 dmap: &'tree DirstateMap<'on_disk>,
153 164 options: StatusOptions,
154 165 matcher: &'a (dyn Matcher + Sync),
155 166 ignore_fn: IgnoreFnType<'a>,
156 167 outcome: Mutex<DirstateStatus<'on_disk>>,
157 168 new_cachable_directories:
158 169 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
159 170 outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
160 171
161 172 /// Whether ignore files like `.hgignore` have changed since the previous
162 173 /// time a `status()` call wrote their hash to the dirstate. `None` means
163 174 /// we don’t know as this run doesn’t list either ignored or uknown files
164 175 /// and therefore isn’t reading `.hgignore`.
165 176 ignore_patterns_have_changed: Option<bool>,
166 177
167 178 /// The current time at the start of the `status()` algorithm, as measured
168 179 /// and possibly truncated by the filesystem.
169 180 filesystem_time_at_status_start: Option<TruncatedTimestamp>,
170 181 }
171 182
172 183 enum Outcome {
173 184 Modified,
174 185 Added,
175 186 Removed,
176 187 Deleted,
177 188 Clean,
178 189 Ignored,
179 190 Unknown,
180 191 Unsure,
181 192 }
182 193
183 194 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
184 195 fn push_outcome(
185 196 &self,
186 197 which: Outcome,
187 198 dirstate_node: &NodeRef<'tree, 'on_disk>,
188 199 ) -> Result<(), DirstateV2ParseError> {
189 200 let path = dirstate_node
190 201 .full_path_borrowed(self.dmap.on_disk)?
191 202 .detach_from_tree();
192 203 let copy_source = if self.options.list_copies {
193 204 dirstate_node
194 205 .copy_source_borrowed(self.dmap.on_disk)?
195 206 .map(|source| source.detach_from_tree())
196 207 } else {
197 208 None
198 209 };
199 210 self.push_outcome_common(which, path, copy_source);
200 211 Ok(())
201 212 }
202 213
203 214 fn push_outcome_without_copy_source(
204 215 &self,
205 216 which: Outcome,
206 217 path: &BorrowedPath<'_, 'on_disk>,
207 218 ) {
208 219 self.push_outcome_common(which, path.detach_from_tree(), None)
209 220 }
210 221
211 222 fn push_outcome_common(
212 223 &self,
213 224 which: Outcome,
214 225 path: HgPathCow<'on_disk>,
215 226 copy_source: Option<HgPathCow<'on_disk>>,
216 227 ) {
217 228 let mut outcome = self.outcome.lock().unwrap();
218 229 let vec = match which {
219 230 Outcome::Modified => &mut outcome.modified,
220 231 Outcome::Added => &mut outcome.added,
221 232 Outcome::Removed => &mut outcome.removed,
222 233 Outcome::Deleted => &mut outcome.deleted,
223 234 Outcome::Clean => &mut outcome.clean,
224 235 Outcome::Ignored => &mut outcome.ignored,
225 236 Outcome::Unknown => &mut outcome.unknown,
226 237 Outcome::Unsure => &mut outcome.unsure,
227 238 };
228 239 vec.push(StatusPath { path, copy_source });
229 240 }
230 241
231 242 fn read_dir(
232 243 &self,
233 244 hg_path: &HgPath,
234 245 fs_path: &Path,
235 246 is_at_repo_root: bool,
236 247 ) -> Result<Vec<DirEntry>, ()> {
237 248 DirEntry::read_dir(fs_path, is_at_repo_root)
238 249 .map_err(|error| self.io_error(error, hg_path))
239 250 }
240 251
241 252 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
242 253 let errno = error.raw_os_error().expect("expected real OS error");
243 254 self.outcome
244 255 .lock()
245 256 .unwrap()
246 257 .bad
247 258 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
248 259 }
249 260
250 261 fn check_for_outdated_directory_cache(
251 262 &self,
252 263 dirstate_node: &NodeRef<'tree, 'on_disk>,
253 264 ) -> Result<(), DirstateV2ParseError> {
254 265 if self.ignore_patterns_have_changed == Some(true)
255 266 && dirstate_node.cached_directory_mtime()?.is_some()
256 267 {
257 268 self.outated_cached_directories.lock().unwrap().push(
258 269 dirstate_node
259 270 .full_path_borrowed(self.dmap.on_disk)?
260 271 .detach_from_tree(),
261 272 )
262 273 }
263 274 Ok(())
264 275 }
265 276
266 277 /// If this returns true, we can get accurate results by only using
267 278 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
268 279 /// need to call `read_dir`.
269 280 fn can_skip_fs_readdir(
270 281 &self,
271 282 directory_metadata: Option<&std::fs::Metadata>,
272 283 cached_directory_mtime: Option<TruncatedTimestamp>,
273 284 ) -> bool {
274 285 if !self.options.list_unknown && !self.options.list_ignored {
275 286 // All states that we care about listing have corresponding
276 287 // dirstate entries.
277 288 // This happens for example with `hg status -mard`.
278 289 return true;
279 290 }
280 291 if !self.options.list_ignored
281 292 && self.ignore_patterns_have_changed == Some(false)
282 293 {
283 294 if let Some(cached_mtime) = cached_directory_mtime {
284 295 // The dirstate contains a cached mtime for this directory, set
285 296 // by a previous run of the `status` algorithm which found this
286 297 // directory eligible for `read_dir` caching.
287 298 if let Some(meta) = directory_metadata {
288 299 if cached_mtime
289 300 .likely_equal_to_mtime_of(meta)
290 301 .unwrap_or(false)
291 302 {
292 303 // The mtime of that directory has not changed
293 304 // since then, which means that the results of
294 305 // `read_dir` should also be unchanged.
295 306 return true;
296 307 }
297 308 }
298 309 }
299 310 }
300 311 false
301 312 }
302 313
303 314 /// Returns whether all child entries of the filesystem directory have a
304 315 /// corresponding dirstate node or are ignored.
305 316 fn traverse_fs_directory_and_dirstate(
306 317 &self,
307 318 has_ignored_ancestor: bool,
308 319 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
309 320 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
310 321 directory_fs_path: &Path,
311 322 directory_metadata: Option<&std::fs::Metadata>,
312 323 cached_directory_mtime: Option<TruncatedTimestamp>,
313 324 is_at_repo_root: bool,
314 325 ) -> Result<bool, DirstateV2ParseError> {
315 326 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
316 327 {
317 328 dirstate_nodes
318 329 .par_iter()
319 330 .map(|dirstate_node| {
320 331 let fs_path = directory_fs_path.join(get_path_from_bytes(
321 332 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
322 333 ));
323 334 match std::fs::symlink_metadata(&fs_path) {
324 335 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
325 336 &fs_path,
326 337 &fs_metadata,
327 338 dirstate_node,
328 339 has_ignored_ancestor,
329 340 ),
330 341 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
331 342 self.traverse_dirstate_only(dirstate_node)
332 343 }
333 344 Err(error) => {
334 345 let hg_path =
335 346 dirstate_node.full_path(self.dmap.on_disk)?;
336 347 Ok(self.io_error(error, hg_path))
337 348 }
338 349 }
339 350 })
340 351 .collect::<Result<_, _>>()?;
341 352
342 353 // We don’t know, so conservatively say this isn’t the case
343 354 let children_all_have_dirstate_node_or_are_ignored = false;
344 355
345 356 return Ok(children_all_have_dirstate_node_or_are_ignored);
346 357 }
347 358
348 359 let mut fs_entries = if let Ok(entries) = self.read_dir(
349 360 directory_hg_path,
350 361 directory_fs_path,
351 362 is_at_repo_root,
352 363 ) {
353 364 entries
354 365 } else {
355 366 // Treat an unreadable directory (typically because of insufficient
356 367 // permissions) like an empty directory. `self.read_dir` has
357 368 // already called `self.io_error` so a warning will be emitted.
358 369 Vec::new()
359 370 };
360 371
361 372 // `merge_join_by` requires both its input iterators to be sorted:
362 373
363 374 let dirstate_nodes = dirstate_nodes.sorted();
364 375 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
365 376 // https://github.com/rust-lang/rust/issues/34162
366 377 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
367 378
368 379 // Propagate here any error that would happen inside the comparison
369 380 // callback below
370 381 for dirstate_node in &dirstate_nodes {
371 382 dirstate_node.base_name(self.dmap.on_disk)?;
372 383 }
373 384 itertools::merge_join_by(
374 385 dirstate_nodes,
375 386 &fs_entries,
376 387 |dirstate_node, fs_entry| {
377 388 // This `unwrap` never panics because we already propagated
378 389 // those errors above
379 390 dirstate_node
380 391 .base_name(self.dmap.on_disk)
381 392 .unwrap()
382 393 .cmp(&fs_entry.base_name)
383 394 },
384 395 )
385 396 .par_bridge()
386 397 .map(|pair| {
387 398 use itertools::EitherOrBoth::*;
388 399 let has_dirstate_node_or_is_ignored;
389 400 match pair {
390 401 Both(dirstate_node, fs_entry) => {
391 402 self.traverse_fs_and_dirstate(
392 403 &fs_entry.full_path,
393 404 &fs_entry.metadata,
394 405 dirstate_node,
395 406 has_ignored_ancestor,
396 407 )?;
397 408 has_dirstate_node_or_is_ignored = true
398 409 }
399 410 Left(dirstate_node) => {
400 411 self.traverse_dirstate_only(dirstate_node)?;
401 412 has_dirstate_node_or_is_ignored = true;
402 413 }
403 414 Right(fs_entry) => {
404 415 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
405 416 has_ignored_ancestor,
406 417 directory_hg_path,
407 418 fs_entry,
408 419 )
409 420 }
410 421 }
411 422 Ok(has_dirstate_node_or_is_ignored)
412 423 })
413 424 .try_reduce(|| true, |a, b| Ok(a && b))
414 425 }
415 426
416 427 fn traverse_fs_and_dirstate(
417 428 &self,
418 429 fs_path: &Path,
419 430 fs_metadata: &std::fs::Metadata,
420 431 dirstate_node: NodeRef<'tree, 'on_disk>,
421 432 has_ignored_ancestor: bool,
422 433 ) -> Result<(), DirstateV2ParseError> {
423 434 self.check_for_outdated_directory_cache(&dirstate_node)?;
424 435 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
425 436 let file_type = fs_metadata.file_type();
426 437 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
427 438 if !file_or_symlink {
428 439 // If we previously had a file here, it was removed (with
429 440 // `hg rm` or similar) or deleted before it could be
430 441 // replaced by a directory or something else.
431 442 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
432 443 }
433 444 if file_type.is_dir() {
434 445 if self.options.collect_traversed_dirs {
435 446 self.outcome
436 447 .lock()
437 448 .unwrap()
438 449 .traversed
439 450 .push(hg_path.detach_from_tree())
440 451 }
441 452 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
442 453 let is_at_repo_root = false;
443 454 let children_all_have_dirstate_node_or_are_ignored = self
444 455 .traverse_fs_directory_and_dirstate(
445 456 is_ignored,
446 457 dirstate_node.children(self.dmap.on_disk)?,
447 458 hg_path,
448 459 fs_path,
449 460 Some(fs_metadata),
450 461 dirstate_node.cached_directory_mtime()?,
451 462 is_at_repo_root,
452 463 )?;
453 464 self.maybe_save_directory_mtime(
454 465 children_all_have_dirstate_node_or_are_ignored,
455 466 fs_metadata,
456 467 dirstate_node,
457 468 )?
458 469 } else {
459 470 if file_or_symlink && self.matcher.matches(hg_path) {
460 471 if let Some(state) = dirstate_node.state()? {
461 472 match state {
462 473 EntryState::Added => {
463 474 self.push_outcome(Outcome::Added, &dirstate_node)?
464 475 }
465 476 EntryState::Removed => self
466 477 .push_outcome(Outcome::Removed, &dirstate_node)?,
467 478 EntryState::Merged => self
468 479 .push_outcome(Outcome::Modified, &dirstate_node)?,
469 480 EntryState::Normal => self
470 481 .handle_normal_file(&dirstate_node, fs_metadata)?,
471 482 }
472 483 } else {
473 484 // `node.entry.is_none()` indicates a "directory"
474 485 // node, but the filesystem has a file
475 486 self.mark_unknown_or_ignored(
476 487 has_ignored_ancestor,
477 488 hg_path,
478 489 );
479 490 }
480 491 }
481 492
482 493 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
483 494 {
484 495 self.traverse_dirstate_only(child_node)?
485 496 }
486 497 }
487 498 Ok(())
488 499 }
489 500
490 501 fn maybe_save_directory_mtime(
491 502 &self,
492 503 children_all_have_dirstate_node_or_are_ignored: bool,
493 504 directory_metadata: &std::fs::Metadata,
494 505 dirstate_node: NodeRef<'tree, 'on_disk>,
495 506 ) -> Result<(), DirstateV2ParseError> {
496 507 if !children_all_have_dirstate_node_or_are_ignored {
497 508 return Ok(());
498 509 }
499 510 // All filesystem directory entries from `read_dir` have a
500 511 // corresponding node in the dirstate, so we can reconstitute the
501 512 // names of those entries without calling `read_dir` again.
502 513
503 514 // TODO: use let-else here and below when available:
504 515 // https://github.com/rust-lang/rust/issues/87335
505 516 let status_start = if let Some(status_start) =
506 517 &self.filesystem_time_at_status_start
507 518 {
508 519 status_start
509 520 } else {
510 521 return Ok(());
511 522 };
512 523
513 524 // Although the Rust standard library’s `SystemTime` type
514 525 // has nanosecond precision, the times reported for a
515 526 // directory’s (or file’s) modified time may have lower
516 527 // resolution based on the filesystem (for example ext3
517 528 // only stores integer seconds), kernel (see
518 529 // https://stackoverflow.com/a/14393315/1162888), etc.
519 530 let directory_mtime = if let Ok(option) =
520 531 TruncatedTimestamp::for_reliable_mtime_of(
521 532 directory_metadata,
522 533 status_start,
523 534 ) {
524 535 if let Some(directory_mtime) = option {
525 536 directory_mtime
526 537 } else {
527 538 // The directory was modified too recently,
528 539 // don’t cache its `read_dir` results.
529 540 //
530 541 // 1. A change to this directory (direct child was
531 542 // added or removed) cause its mtime to be set
532 543 // (possibly truncated) to `directory_mtime`
533 544 // 2. This `status` algorithm calls `read_dir`
534 545 // 3. An other change is made to the same directory is
535 546 // made so that calling `read_dir` agin would give
536 547 // different results, but soon enough after 1. that
537 548 // the mtime stays the same
538 549 //
539 550 // On a system where the time resolution poor, this
540 551 // scenario is not unlikely if all three steps are caused
541 552 // by the same script.
542 553 return Ok(());
543 554 }
544 555 } else {
545 556 // OS/libc does not support mtime?
546 557 return Ok(());
547 558 };
548 559 // We’ve observed (through `status_start`) that time has
549 560 // “progressed” since `directory_mtime`, so any further
550 561 // change to this directory is extremely likely to cause a
551 562 // different mtime.
552 563 //
553 564 // Having the same mtime again is not entirely impossible
554 565 // since the system clock is not monotonous. It could jump
555 566 // backward to some point before `directory_mtime`, then a
556 567 // directory change could potentially happen during exactly
557 568 // the wrong tick.
558 569 //
559 570 // We deem this scenario (unlike the previous one) to be
560 571 // unlikely enough in practice.
561 572
562 573 let is_up_to_date =
563 574 if let Some(cached) = dirstate_node.cached_directory_mtime()? {
564 575 cached.likely_equal(directory_mtime)
565 576 } else {
566 577 false
567 578 };
568 579 if !is_up_to_date {
569 580 let hg_path = dirstate_node
570 581 .full_path_borrowed(self.dmap.on_disk)?
571 582 .detach_from_tree();
572 583 self.new_cachable_directories
573 584 .lock()
574 585 .unwrap()
575 586 .push((hg_path, directory_mtime))
576 587 }
577 588 Ok(())
578 589 }
579 590
580 591 /// A file with `EntryState::Normal` in the dirstate was found in the
581 592 /// filesystem
582 593 fn handle_normal_file(
583 594 &self,
584 595 dirstate_node: &NodeRef<'tree, 'on_disk>,
585 596 fs_metadata: &std::fs::Metadata,
586 597 ) -> Result<(), DirstateV2ParseError> {
587 598 // Keep the low 31 bits
588 599 fn truncate_u64(value: u64) -> i32 {
589 600 (value & 0x7FFF_FFFF) as i32
590 601 }
591 602
592 603 let entry = dirstate_node
593 604 .entry()?
594 605 .expect("handle_normal_file called with entry-less node");
595 606 let mode_changed =
596 607 || self.options.check_exec && entry.mode_changed(fs_metadata);
597 608 let size = entry.size();
598 609 let size_changed = size != truncate_u64(fs_metadata.len());
599 610 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
600 611 // issue6456: Size returned may be longer due to encryption
601 612 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
602 613 self.push_outcome(Outcome::Unsure, dirstate_node)?
603 614 } else if dirstate_node.has_copy_source()
604 615 || entry.is_from_other_parent()
605 616 || (size >= 0 && (size_changed || mode_changed()))
606 617 {
607 618 self.push_outcome(Outcome::Modified, dirstate_node)?
608 619 } else {
609 620 let mtime_looks_clean;
610 621 if let Some(dirstate_mtime) = entry.truncated_mtime() {
611 622 let fs_mtime = TruncatedTimestamp::for_mtime_of(fs_metadata)
612 623 .expect("OS/libc does not support mtime?");
613 624 // There might be a change in the future if for example the
614 625 // internal clock become off while process run, but this is a
615 626 // case where the issues the user would face
616 627 // would be a lot worse and there is nothing we
617 628 // can really do.
618 629 mtime_looks_clean = fs_mtime.likely_equal(dirstate_mtime)
619 630 } else {
620 631 // No mtime in the dirstate entry
621 632 mtime_looks_clean = false
622 633 };
623 634 if !mtime_looks_clean {
624 635 self.push_outcome(Outcome::Unsure, dirstate_node)?
625 636 } else if self.options.list_clean {
626 637 self.push_outcome(Outcome::Clean, dirstate_node)?
627 638 }
628 639 }
629 640 Ok(())
630 641 }
631 642
632 643 /// A node in the dirstate tree has no corresponding filesystem entry
633 644 fn traverse_dirstate_only(
634 645 &self,
635 646 dirstate_node: NodeRef<'tree, 'on_disk>,
636 647 ) -> Result<(), DirstateV2ParseError> {
637 648 self.check_for_outdated_directory_cache(&dirstate_node)?;
638 649 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
639 650 dirstate_node
640 651 .children(self.dmap.on_disk)?
641 652 .par_iter()
642 653 .map(|child_node| self.traverse_dirstate_only(child_node))
643 654 .collect()
644 655 }
645 656
646 657 /// A node in the dirstate tree has no corresponding *file* on the
647 658 /// filesystem
648 659 ///
649 660 /// Does nothing on a "directory" node
650 661 fn mark_removed_or_deleted_if_file(
651 662 &self,
652 663 dirstate_node: &NodeRef<'tree, 'on_disk>,
653 664 ) -> Result<(), DirstateV2ParseError> {
654 665 if let Some(state) = dirstate_node.state()? {
655 666 let path = dirstate_node.full_path(self.dmap.on_disk)?;
656 667 if self.matcher.matches(path) {
657 668 if let EntryState::Removed = state {
658 669 self.push_outcome(Outcome::Removed, dirstate_node)?
659 670 } else {
660 671 self.push_outcome(Outcome::Deleted, &dirstate_node)?
661 672 }
662 673 }
663 674 }
664 675 Ok(())
665 676 }
666 677
667 678 /// Something in the filesystem has no corresponding dirstate node
668 679 ///
669 680 /// Returns whether that path is ignored
670 681 fn traverse_fs_only(
671 682 &self,
672 683 has_ignored_ancestor: bool,
673 684 directory_hg_path: &HgPath,
674 685 fs_entry: &DirEntry,
675 686 ) -> bool {
676 687 let hg_path = directory_hg_path.join(&fs_entry.base_name);
677 688 let file_type = fs_entry.metadata.file_type();
678 689 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
679 690 if file_type.is_dir() {
680 691 let is_ignored =
681 692 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
682 693 let traverse_children = if is_ignored {
683 694 // Descendants of an ignored directory are all ignored
684 695 self.options.list_ignored
685 696 } else {
686 697 // Descendants of an unknown directory may be either unknown or
687 698 // ignored
688 699 self.options.list_unknown || self.options.list_ignored
689 700 };
690 701 if traverse_children {
691 702 let is_at_repo_root = false;
692 703 if let Ok(children_fs_entries) = self.read_dir(
693 704 &hg_path,
694 705 &fs_entry.full_path,
695 706 is_at_repo_root,
696 707 ) {
697 708 children_fs_entries.par_iter().for_each(|child_fs_entry| {
698 709 self.traverse_fs_only(
699 710 is_ignored,
700 711 &hg_path,
701 712 child_fs_entry,
702 713 );
703 714 })
704 715 }
705 716 }
706 717 if self.options.collect_traversed_dirs {
707 718 self.outcome.lock().unwrap().traversed.push(hg_path.into())
708 719 }
709 720 is_ignored
710 721 } else {
711 722 if file_or_symlink {
712 723 if self.matcher.matches(&hg_path) {
713 724 self.mark_unknown_or_ignored(
714 725 has_ignored_ancestor,
715 726 &BorrowedPath::InMemory(&hg_path),
716 727 )
717 728 } else {
718 729 // We haven’t computed whether this path is ignored. It
719 730 // might not be, and a future run of status might have a
720 731 // different matcher that matches it. So treat it as not
721 732 // ignored. That is, inhibit readdir caching of the parent
722 733 // directory.
723 734 false
724 735 }
725 736 } else {
726 737 // This is neither a directory, a plain file, or a symlink.
727 738 // Treat it like an ignored file.
728 739 true
729 740 }
730 741 }
731 742 }
732 743
733 744 /// Returns whether that path is ignored
734 745 fn mark_unknown_or_ignored(
735 746 &self,
736 747 has_ignored_ancestor: bool,
737 748 hg_path: &BorrowedPath<'_, 'on_disk>,
738 749 ) -> bool {
739 750 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
740 751 if is_ignored {
741 752 if self.options.list_ignored {
742 753 self.push_outcome_without_copy_source(
743 754 Outcome::Ignored,
744 755 hg_path,
745 756 )
746 757 }
747 758 } else {
748 759 if self.options.list_unknown {
749 760 self.push_outcome_without_copy_source(
750 761 Outcome::Unknown,
751 762 hg_path,
752 763 )
753 764 }
754 765 }
755 766 is_ignored
756 767 }
757 768 }
758 769
759 770 struct DirEntry {
760 771 base_name: HgPathBuf,
761 772 full_path: PathBuf,
762 773 metadata: std::fs::Metadata,
763 774 }
764 775
765 776 impl DirEntry {
766 777 /// Returns **unsorted** entries in the given directory, with name and
767 778 /// metadata.
768 779 ///
769 780 /// If a `.hg` sub-directory is encountered:
770 781 ///
771 782 /// * At the repository root, ignore that sub-directory
772 783 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
773 784 /// list instead.
774 785 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
775 786 // `read_dir` returns a "not found" error for the empty path
776 787 let at_cwd = path == Path::new("");
777 788 let read_dir_path = if at_cwd { Path::new(".") } else { path };
778 789 let mut results = Vec::new();
779 790 for entry in read_dir_path.read_dir()? {
780 791 let entry = entry?;
781 792 let metadata = match entry.metadata() {
782 793 Ok(v) => v,
783 794 Err(e) => {
784 795 // race with file deletion?
785 796 if e.kind() == std::io::ErrorKind::NotFound {
786 797 continue;
787 798 } else {
788 799 return Err(e);
789 800 }
790 801 }
791 802 };
792 803 let file_name = entry.file_name();
793 804 // FIXME don't do this when cached
794 805 if file_name == ".hg" {
795 806 if is_at_repo_root {
796 807 // Skip the repo’s own .hg (might be a symlink)
797 808 continue;
798 809 } else if metadata.is_dir() {
799 810 // A .hg sub-directory at another location means a subrepo,
800 811 // skip it entirely.
801 812 return Ok(Vec::new());
802 813 }
803 814 }
804 815 let full_path = if at_cwd {
805 816 file_name.clone().into()
806 817 } else {
807 818 entry.path()
808 819 };
809 820 let base_name = get_bytes_from_os_string(file_name).into();
810 821 results.push(DirEntry {
811 822 base_name,
812 823 full_path,
813 824 metadata,
814 825 })
815 826 }
816 827 Ok(results)
817 828 }
818 829 }
819 830
820 831 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
821 832 /// of the give repository.
822 833 ///
823 834 /// This is similar to `SystemTime::now()`, with the result truncated to the
824 835 /// same time resolution as other files’ modification times. Using `.hg`
825 836 /// instead of the system’s default temporary directory (such as `/tmp`) makes
826 837 /// it more likely the temporary file is in the same disk partition as contents
827 838 /// of the working directory, which can matter since different filesystems may
828 839 /// store timestamps with different resolutions.
829 840 ///
830 841 /// This may fail, typically if we lack write permissions. In that case we
831 842 /// should continue the `status()` algoritm anyway and consider the current
832 843 /// date/time to be unknown.
833 844 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
834 845 tempfile::tempfile_in(repo_root.join(".hg"))?
835 846 .metadata()?
836 847 .modified()
837 848 }
General Comments 0
You need to be logged in to leave comments. Login now