##// END OF EJS Templates
rust-status: ignored directories are now correctly only listed if opted into...
Raphaël Gomès -
r50316:7e5377bd stable
parent child Browse files
Show More
@@ -1,863 +1,863 b''
1 1 use crate::dirstate::entry::TruncatedTimestamp;
2 2 use crate::dirstate::status::IgnoreFnType;
3 3 use crate::dirstate::status::StatusPath;
4 4 use crate::dirstate_tree::dirstate_map::BorrowedPath;
5 5 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
6 6 use crate::dirstate_tree::dirstate_map::DirstateMap;
7 7 use crate::dirstate_tree::dirstate_map::DirstateVersion;
8 8 use crate::dirstate_tree::dirstate_map::NodeRef;
9 9 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
10 10 use crate::matchers::get_ignore_function;
11 11 use crate::matchers::Matcher;
12 12 use crate::utils::files::get_bytes_from_os_string;
13 13 use crate::utils::files::get_path_from_bytes;
14 14 use crate::utils::hg_path::HgPath;
15 15 use crate::BadMatch;
16 16 use crate::DirstateStatus;
17 17 use crate::HgPathBuf;
18 18 use crate::HgPathCow;
19 19 use crate::PatternFileWarning;
20 20 use crate::StatusError;
21 21 use crate::StatusOptions;
22 22 use micro_timer::timed;
23 23 use rayon::prelude::*;
24 24 use sha1::{Digest, Sha1};
25 25 use std::borrow::Cow;
26 26 use std::io;
27 27 use std::path::Path;
28 28 use std::path::PathBuf;
29 29 use std::sync::Mutex;
30 30 use std::time::SystemTime;
31 31
32 32 /// Returns the status of the working directory compared to its parent
33 33 /// changeset.
34 34 ///
35 35 /// This algorithm is based on traversing the filesystem tree (`fs` in function
36 36 /// and variable names) and dirstate tree at the same time. The core of this
37 37 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
38 38 /// and its use of `itertools::merge_join_by`. When reaching a path that only
39 39 /// exists in one of the two trees, depending on information requested by
40 40 /// `options` we may need to traverse the remaining subtree.
41 41 #[timed]
42 42 pub fn status<'dirstate>(
43 43 dmap: &'dirstate mut DirstateMap,
44 44 matcher: &(dyn Matcher + Sync),
45 45 root_dir: PathBuf,
46 46 ignore_files: Vec<PathBuf>,
47 47 options: StatusOptions,
48 48 ) -> Result<(DirstateStatus<'dirstate>, Vec<PatternFileWarning>), StatusError>
49 49 {
50 50 // Force the global rayon threadpool to not exceed 16 concurrent threads.
51 51 // This is a stop-gap measure until we figure out why using more than 16
52 52 // threads makes `status` slower for each additional thread.
53 53 // We use `ok()` in case the global threadpool has already been
54 54 // instantiated in `rhg` or some other caller.
55 55 // TODO find the underlying cause and fix it, then remove this.
56 56 rayon::ThreadPoolBuilder::new()
57 57 .num_threads(16)
58 58 .build_global()
59 59 .ok();
60 60
61 61 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
62 62 if options.list_ignored || options.list_unknown {
63 63 let (ignore_fn, warnings, changed) = match dmap.dirstate_version {
64 64 DirstateVersion::V1 => {
65 65 let (ignore_fn, warnings) = get_ignore_function(
66 66 ignore_files,
67 67 &root_dir,
68 68 &mut |_pattern_bytes| {},
69 69 )?;
70 70 (ignore_fn, warnings, None)
71 71 }
72 72 DirstateVersion::V2 => {
73 73 let mut hasher = Sha1::new();
74 74 let (ignore_fn, warnings) = get_ignore_function(
75 75 ignore_files,
76 76 &root_dir,
77 77 &mut |pattern_bytes| hasher.update(pattern_bytes),
78 78 )?;
79 79 let new_hash = *hasher.finalize().as_ref();
80 80 let changed = new_hash != dmap.ignore_patterns_hash;
81 81 dmap.ignore_patterns_hash = new_hash;
82 82 (ignore_fn, warnings, Some(changed))
83 83 }
84 84 };
85 85 (ignore_fn, warnings, changed)
86 86 } else {
87 87 (Box::new(|&_| true), vec![], None)
88 88 };
89 89
90 90 let filesystem_time_at_status_start =
91 91 filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
92 92
93 93 // If the repository is under the current directory, prefer using a
94 94 // relative path, so the kernel needs to traverse fewer directory in every
95 95 // call to `read_dir` or `symlink_metadata`.
96 96 // This is effective in the common case where the current directory is the
97 97 // repository root.
98 98
99 99 // TODO: Better yet would be to use libc functions like `openat` and
100 100 // `fstatat` to remove such repeated traversals entirely, but the standard
101 101 // library does not provide APIs based on those.
102 102 // Maybe with a crate like https://crates.io/crates/openat instead?
103 103 let root_dir = if let Some(relative) = std::env::current_dir()
104 104 .ok()
105 105 .and_then(|cwd| root_dir.strip_prefix(cwd).ok())
106 106 {
107 107 relative
108 108 } else {
109 109 &root_dir
110 110 };
111 111
112 112 let outcome = DirstateStatus {
113 113 filesystem_time_at_status_start,
114 114 ..Default::default()
115 115 };
116 116 let common = StatusCommon {
117 117 dmap,
118 118 options,
119 119 matcher,
120 120 ignore_fn,
121 121 outcome: Mutex::new(outcome),
122 122 ignore_patterns_have_changed: patterns_changed,
123 123 new_cachable_directories: Default::default(),
124 124 outated_cached_directories: Default::default(),
125 125 filesystem_time_at_status_start,
126 126 };
127 127 let is_at_repo_root = true;
128 128 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
129 129 let has_ignored_ancestor = false;
130 130 let root_cached_mtime = None;
131 131 let root_dir_metadata = None;
132 132 // If the path we have for the repository root is a symlink, do follow it.
133 133 // (As opposed to symlinks within the working directory which are not
134 134 // followed, using `std::fs::symlink_metadata`.)
135 135 common.traverse_fs_directory_and_dirstate(
136 136 has_ignored_ancestor,
137 137 dmap.root.as_ref(),
138 138 hg_path,
139 139 &root_dir,
140 140 root_dir_metadata,
141 141 root_cached_mtime,
142 142 is_at_repo_root,
143 143 )?;
144 144 let mut outcome = common.outcome.into_inner().unwrap();
145 145 let new_cachable = common.new_cachable_directories.into_inner().unwrap();
146 146 let outdated = common.outated_cached_directories.into_inner().unwrap();
147 147
148 148 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
149 149 || !outdated.is_empty()
150 150 || (!new_cachable.is_empty()
151 151 && dmap.dirstate_version == DirstateVersion::V2);
152 152
153 153 // Remove outdated mtimes before adding new mtimes, in case a given
154 154 // directory is both
155 155 for path in &outdated {
156 156 dmap.clear_cached_mtime(path)?;
157 157 }
158 158 for (path, mtime) in &new_cachable {
159 159 dmap.set_cached_mtime(path, *mtime)?;
160 160 }
161 161
162 162 Ok((outcome, warnings))
163 163 }
164 164
165 165 /// Bag of random things needed by various parts of the algorithm. Reduces the
166 166 /// number of parameters passed to functions.
167 167 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
168 168 dmap: &'tree DirstateMap<'on_disk>,
169 169 options: StatusOptions,
170 170 matcher: &'a (dyn Matcher + Sync),
171 171 ignore_fn: IgnoreFnType<'a>,
172 172 outcome: Mutex<DirstateStatus<'on_disk>>,
173 173 new_cachable_directories:
174 174 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
175 175 outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
176 176
177 177 /// Whether ignore files like `.hgignore` have changed since the previous
178 178 /// time a `status()` call wrote their hash to the dirstate. `None` means
179 179 /// we don’t know as this run doesn’t list either ignored or uknown files
180 180 /// and therefore isn’t reading `.hgignore`.
181 181 ignore_patterns_have_changed: Option<bool>,
182 182
183 183 /// The current time at the start of the `status()` algorithm, as measured
184 184 /// and possibly truncated by the filesystem.
185 185 filesystem_time_at_status_start: Option<TruncatedTimestamp>,
186 186 }
187 187
188 188 enum Outcome {
189 189 Modified,
190 190 Added,
191 191 Removed,
192 192 Deleted,
193 193 Clean,
194 194 Ignored,
195 195 Unknown,
196 196 Unsure,
197 197 }
198 198
199 199 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
200 200 fn push_outcome(
201 201 &self,
202 202 which: Outcome,
203 203 dirstate_node: &NodeRef<'tree, 'on_disk>,
204 204 ) -> Result<(), DirstateV2ParseError> {
205 205 let path = dirstate_node
206 206 .full_path_borrowed(self.dmap.on_disk)?
207 207 .detach_from_tree();
208 208 let copy_source = if self.options.list_copies {
209 209 dirstate_node
210 210 .copy_source_borrowed(self.dmap.on_disk)?
211 211 .map(|source| source.detach_from_tree())
212 212 } else {
213 213 None
214 214 };
215 215 self.push_outcome_common(which, path, copy_source);
216 216 Ok(())
217 217 }
218 218
219 219 fn push_outcome_without_copy_source(
220 220 &self,
221 221 which: Outcome,
222 222 path: &BorrowedPath<'_, 'on_disk>,
223 223 ) {
224 224 self.push_outcome_common(which, path.detach_from_tree(), None)
225 225 }
226 226
227 227 fn push_outcome_common(
228 228 &self,
229 229 which: Outcome,
230 230 path: HgPathCow<'on_disk>,
231 231 copy_source: Option<HgPathCow<'on_disk>>,
232 232 ) {
233 233 let mut outcome = self.outcome.lock().unwrap();
234 234 let vec = match which {
235 235 Outcome::Modified => &mut outcome.modified,
236 236 Outcome::Added => &mut outcome.added,
237 237 Outcome::Removed => &mut outcome.removed,
238 238 Outcome::Deleted => &mut outcome.deleted,
239 239 Outcome::Clean => &mut outcome.clean,
240 240 Outcome::Ignored => &mut outcome.ignored,
241 241 Outcome::Unknown => &mut outcome.unknown,
242 242 Outcome::Unsure => &mut outcome.unsure,
243 243 };
244 244 vec.push(StatusPath { path, copy_source });
245 245 }
246 246
247 247 fn read_dir(
248 248 &self,
249 249 hg_path: &HgPath,
250 250 fs_path: &Path,
251 251 is_at_repo_root: bool,
252 252 ) -> Result<Vec<DirEntry>, ()> {
253 253 DirEntry::read_dir(fs_path, is_at_repo_root)
254 254 .map_err(|error| self.io_error(error, hg_path))
255 255 }
256 256
257 257 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
258 258 let errno = error.raw_os_error().expect("expected real OS error");
259 259 self.outcome
260 260 .lock()
261 261 .unwrap()
262 262 .bad
263 263 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
264 264 }
265 265
266 266 fn check_for_outdated_directory_cache(
267 267 &self,
268 268 dirstate_node: &NodeRef<'tree, 'on_disk>,
269 269 ) -> Result<(), DirstateV2ParseError> {
270 270 if self.ignore_patterns_have_changed == Some(true)
271 271 && dirstate_node.cached_directory_mtime()?.is_some()
272 272 {
273 273 self.outated_cached_directories.lock().unwrap().push(
274 274 dirstate_node
275 275 .full_path_borrowed(self.dmap.on_disk)?
276 276 .detach_from_tree(),
277 277 )
278 278 }
279 279 Ok(())
280 280 }
281 281
282 282 /// If this returns true, we can get accurate results by only using
283 283 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
284 284 /// need to call `read_dir`.
285 285 fn can_skip_fs_readdir(
286 286 &self,
287 287 directory_metadata: Option<&std::fs::Metadata>,
288 288 cached_directory_mtime: Option<TruncatedTimestamp>,
289 289 ) -> bool {
290 290 if !self.options.list_unknown && !self.options.list_ignored {
291 291 // All states that we care about listing have corresponding
292 292 // dirstate entries.
293 293 // This happens for example with `hg status -mard`.
294 294 return true;
295 295 }
296 296 if !self.options.list_ignored
297 297 && self.ignore_patterns_have_changed == Some(false)
298 298 {
299 299 if let Some(cached_mtime) = cached_directory_mtime {
300 300 // The dirstate contains a cached mtime for this directory, set
301 301 // by a previous run of the `status` algorithm which found this
302 302 // directory eligible for `read_dir` caching.
303 303 if let Some(meta) = directory_metadata {
304 304 if cached_mtime
305 305 .likely_equal_to_mtime_of(meta)
306 306 .unwrap_or(false)
307 307 {
308 308 // The mtime of that directory has not changed
309 309 // since then, which means that the results of
310 310 // `read_dir` should also be unchanged.
311 311 return true;
312 312 }
313 313 }
314 314 }
315 315 }
316 316 false
317 317 }
318 318
319 319 /// Returns whether all child entries of the filesystem directory have a
320 320 /// corresponding dirstate node or are ignored.
321 321 fn traverse_fs_directory_and_dirstate(
322 322 &self,
323 323 has_ignored_ancestor: bool,
324 324 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
325 325 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
326 326 directory_fs_path: &Path,
327 327 directory_metadata: Option<&std::fs::Metadata>,
328 328 cached_directory_mtime: Option<TruncatedTimestamp>,
329 329 is_at_repo_root: bool,
330 330 ) -> Result<bool, DirstateV2ParseError> {
331 331 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
332 332 {
333 333 dirstate_nodes
334 334 .par_iter()
335 335 .map(|dirstate_node| {
336 336 let fs_path = directory_fs_path.join(get_path_from_bytes(
337 337 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
338 338 ));
339 339 match std::fs::symlink_metadata(&fs_path) {
340 340 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
341 341 &fs_path,
342 342 &fs_metadata,
343 343 dirstate_node,
344 344 has_ignored_ancestor,
345 345 ),
346 346 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
347 347 self.traverse_dirstate_only(dirstate_node)
348 348 }
349 349 Err(error) => {
350 350 let hg_path =
351 351 dirstate_node.full_path(self.dmap.on_disk)?;
352 352 Ok(self.io_error(error, hg_path))
353 353 }
354 354 }
355 355 })
356 356 .collect::<Result<_, _>>()?;
357 357
358 358 // We don’t know, so conservatively say this isn’t the case
359 359 let children_all_have_dirstate_node_or_are_ignored = false;
360 360
361 361 return Ok(children_all_have_dirstate_node_or_are_ignored);
362 362 }
363 363
364 364 let mut fs_entries = if let Ok(entries) = self.read_dir(
365 365 directory_hg_path,
366 366 directory_fs_path,
367 367 is_at_repo_root,
368 368 ) {
369 369 entries
370 370 } else {
371 371 // Treat an unreadable directory (typically because of insufficient
372 372 // permissions) like an empty directory. `self.read_dir` has
373 373 // already called `self.io_error` so a warning will be emitted.
374 374 Vec::new()
375 375 };
376 376
377 377 // `merge_join_by` requires both its input iterators to be sorted:
378 378
379 379 let dirstate_nodes = dirstate_nodes.sorted();
380 380 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
381 381 // https://github.com/rust-lang/rust/issues/34162
382 382 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
383 383
384 384 // Propagate here any error that would happen inside the comparison
385 385 // callback below
386 386 for dirstate_node in &dirstate_nodes {
387 387 dirstate_node.base_name(self.dmap.on_disk)?;
388 388 }
389 389 itertools::merge_join_by(
390 390 dirstate_nodes,
391 391 &fs_entries,
392 392 |dirstate_node, fs_entry| {
393 393 // This `unwrap` never panics because we already propagated
394 394 // those errors above
395 395 dirstate_node
396 396 .base_name(self.dmap.on_disk)
397 397 .unwrap()
398 398 .cmp(&fs_entry.base_name)
399 399 },
400 400 )
401 401 .par_bridge()
402 402 .map(|pair| {
403 403 use itertools::EitherOrBoth::*;
404 404 let has_dirstate_node_or_is_ignored;
405 405 match pair {
406 406 Both(dirstate_node, fs_entry) => {
407 407 self.traverse_fs_and_dirstate(
408 408 &fs_entry.full_path,
409 409 &fs_entry.metadata,
410 410 dirstate_node,
411 411 has_ignored_ancestor,
412 412 )?;
413 413 has_dirstate_node_or_is_ignored = true
414 414 }
415 415 Left(dirstate_node) => {
416 416 self.traverse_dirstate_only(dirstate_node)?;
417 417 has_dirstate_node_or_is_ignored = true;
418 418 }
419 419 Right(fs_entry) => {
420 420 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
421 421 has_ignored_ancestor,
422 422 directory_hg_path,
423 423 fs_entry,
424 424 )
425 425 }
426 426 }
427 427 Ok(has_dirstate_node_or_is_ignored)
428 428 })
429 429 .try_reduce(|| true, |a, b| Ok(a && b))
430 430 }
431 431
432 432 fn traverse_fs_and_dirstate(
433 433 &self,
434 434 fs_path: &Path,
435 435 fs_metadata: &std::fs::Metadata,
436 436 dirstate_node: NodeRef<'tree, 'on_disk>,
437 437 has_ignored_ancestor: bool,
438 438 ) -> Result<(), DirstateV2ParseError> {
439 439 self.check_for_outdated_directory_cache(&dirstate_node)?;
440 440 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
441 441 let file_type = fs_metadata.file_type();
442 442 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
443 443 if !file_or_symlink {
444 444 // If we previously had a file here, it was removed (with
445 445 // `hg rm` or similar) or deleted before it could be
446 446 // replaced by a directory or something else.
447 447 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
448 448 }
449 449 if file_type.is_dir() {
450 450 if self.options.collect_traversed_dirs {
451 451 self.outcome
452 452 .lock()
453 453 .unwrap()
454 454 .traversed
455 455 .push(hg_path.detach_from_tree())
456 456 }
457 457 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
458 458 let is_at_repo_root = false;
459 459 let children_all_have_dirstate_node_or_are_ignored = self
460 460 .traverse_fs_directory_and_dirstate(
461 461 is_ignored,
462 462 dirstate_node.children(self.dmap.on_disk)?,
463 463 hg_path,
464 464 fs_path,
465 465 Some(fs_metadata),
466 466 dirstate_node.cached_directory_mtime()?,
467 467 is_at_repo_root,
468 468 )?;
469 469 self.maybe_save_directory_mtime(
470 470 children_all_have_dirstate_node_or_are_ignored,
471 471 fs_metadata,
472 472 dirstate_node,
473 473 )?
474 474 } else {
475 475 if file_or_symlink && self.matcher.matches(hg_path) {
476 476 if let Some(entry) = dirstate_node.entry()? {
477 477 if !entry.any_tracked() {
478 478 // Forward-compat if we start tracking unknown/ignored
479 479 // files for caching reasons
480 480 self.mark_unknown_or_ignored(
481 481 has_ignored_ancestor,
482 482 hg_path,
483 483 );
484 484 }
485 485 if entry.added() {
486 486 self.push_outcome(Outcome::Added, &dirstate_node)?;
487 487 } else if entry.removed() {
488 488 self.push_outcome(Outcome::Removed, &dirstate_node)?;
489 489 } else if entry.modified() {
490 490 self.push_outcome(Outcome::Modified, &dirstate_node)?;
491 491 } else {
492 492 self.handle_normal_file(&dirstate_node, fs_metadata)?;
493 493 }
494 494 } else {
495 495 // `node.entry.is_none()` indicates a "directory"
496 496 // node, but the filesystem has a file
497 497 self.mark_unknown_or_ignored(
498 498 has_ignored_ancestor,
499 499 hg_path,
500 500 );
501 501 }
502 502 }
503 503
504 504 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
505 505 {
506 506 self.traverse_dirstate_only(child_node)?
507 507 }
508 508 }
509 509 Ok(())
510 510 }
511 511
512 512 fn maybe_save_directory_mtime(
513 513 &self,
514 514 children_all_have_dirstate_node_or_are_ignored: bool,
515 515 directory_metadata: &std::fs::Metadata,
516 516 dirstate_node: NodeRef<'tree, 'on_disk>,
517 517 ) -> Result<(), DirstateV2ParseError> {
518 518 if !children_all_have_dirstate_node_or_are_ignored {
519 519 return Ok(());
520 520 }
521 521 // All filesystem directory entries from `read_dir` have a
522 522 // corresponding node in the dirstate, so we can reconstitute the
523 523 // names of those entries without calling `read_dir` again.
524 524
525 525 // TODO: use let-else here and below when available:
526 526 // https://github.com/rust-lang/rust/issues/87335
527 527 let status_start = if let Some(status_start) =
528 528 &self.filesystem_time_at_status_start
529 529 {
530 530 status_start
531 531 } else {
532 532 return Ok(());
533 533 };
534 534
535 535 // Although the Rust standard library’s `SystemTime` type
536 536 // has nanosecond precision, the times reported for a
537 537 // directory’s (or file’s) modified time may have lower
538 538 // resolution based on the filesystem (for example ext3
539 539 // only stores integer seconds), kernel (see
540 540 // https://stackoverflow.com/a/14393315/1162888), etc.
541 541 let directory_mtime = if let Ok(option) =
542 542 TruncatedTimestamp::for_reliable_mtime_of(
543 543 directory_metadata,
544 544 status_start,
545 545 ) {
546 546 if let Some(directory_mtime) = option {
547 547 directory_mtime
548 548 } else {
549 549 // The directory was modified too recently,
550 550 // don’t cache its `read_dir` results.
551 551 //
552 552 // 1. A change to this directory (direct child was
553 553 // added or removed) cause its mtime to be set
554 554 // (possibly truncated) to `directory_mtime`
555 555 // 2. This `status` algorithm calls `read_dir`
556 556 // 3. An other change is made to the same directory is
557 557 // made so that calling `read_dir` agin would give
558 558 // different results, but soon enough after 1. that
559 559 // the mtime stays the same
560 560 //
561 561 // On a system where the time resolution poor, this
562 562 // scenario is not unlikely if all three steps are caused
563 563 // by the same script.
564 564 return Ok(());
565 565 }
566 566 } else {
567 567 // OS/libc does not support mtime?
568 568 return Ok(());
569 569 };
570 570 // We’ve observed (through `status_start`) that time has
571 571 // “progressed” since `directory_mtime`, so any further
572 572 // change to this directory is extremely likely to cause a
573 573 // different mtime.
574 574 //
575 575 // Having the same mtime again is not entirely impossible
576 576 // since the system clock is not monotonous. It could jump
577 577 // backward to some point before `directory_mtime`, then a
578 578 // directory change could potentially happen during exactly
579 579 // the wrong tick.
580 580 //
581 581 // We deem this scenario (unlike the previous one) to be
582 582 // unlikely enough in practice.
583 583
584 584 let is_up_to_date =
585 585 if let Some(cached) = dirstate_node.cached_directory_mtime()? {
586 586 cached.likely_equal(directory_mtime)
587 587 } else {
588 588 false
589 589 };
590 590 if !is_up_to_date {
591 591 let hg_path = dirstate_node
592 592 .full_path_borrowed(self.dmap.on_disk)?
593 593 .detach_from_tree();
594 594 self.new_cachable_directories
595 595 .lock()
596 596 .unwrap()
597 597 .push((hg_path, directory_mtime))
598 598 }
599 599 Ok(())
600 600 }
601 601
602 602 /// A file that is clean in the dirstate was found in the filesystem
603 603 fn handle_normal_file(
604 604 &self,
605 605 dirstate_node: &NodeRef<'tree, 'on_disk>,
606 606 fs_metadata: &std::fs::Metadata,
607 607 ) -> Result<(), DirstateV2ParseError> {
608 608 // Keep the low 31 bits
609 609 fn truncate_u64(value: u64) -> i32 {
610 610 (value & 0x7FFF_FFFF) as i32
611 611 }
612 612
613 613 let entry = dirstate_node
614 614 .entry()?
615 615 .expect("handle_normal_file called with entry-less node");
616 616 let mode_changed =
617 617 || self.options.check_exec && entry.mode_changed(fs_metadata);
618 618 let size = entry.size();
619 619 let size_changed = size != truncate_u64(fs_metadata.len());
620 620 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
621 621 // issue6456: Size returned may be longer due to encryption
622 622 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
623 623 self.push_outcome(Outcome::Unsure, dirstate_node)?
624 624 } else if dirstate_node.has_copy_source()
625 625 || entry.is_from_other_parent()
626 626 || (size >= 0 && (size_changed || mode_changed()))
627 627 {
628 628 self.push_outcome(Outcome::Modified, dirstate_node)?
629 629 } else {
630 630 let mtime_looks_clean;
631 631 if let Some(dirstate_mtime) = entry.truncated_mtime() {
632 632 let fs_mtime = TruncatedTimestamp::for_mtime_of(fs_metadata)
633 633 .expect("OS/libc does not support mtime?");
634 634 // There might be a change in the future if for example the
635 635 // internal clock become off while process run, but this is a
636 636 // case where the issues the user would face
637 637 // would be a lot worse and there is nothing we
638 638 // can really do.
639 639 mtime_looks_clean = fs_mtime.likely_equal(dirstate_mtime)
640 640 } else {
641 641 // No mtime in the dirstate entry
642 642 mtime_looks_clean = false
643 643 };
644 644 if !mtime_looks_clean {
645 645 self.push_outcome(Outcome::Unsure, dirstate_node)?
646 646 } else if self.options.list_clean {
647 647 self.push_outcome(Outcome::Clean, dirstate_node)?
648 648 }
649 649 }
650 650 Ok(())
651 651 }
652 652
653 653 /// A node in the dirstate tree has no corresponding filesystem entry
654 654 fn traverse_dirstate_only(
655 655 &self,
656 656 dirstate_node: NodeRef<'tree, 'on_disk>,
657 657 ) -> Result<(), DirstateV2ParseError> {
658 658 self.check_for_outdated_directory_cache(&dirstate_node)?;
659 659 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
660 660 dirstate_node
661 661 .children(self.dmap.on_disk)?
662 662 .par_iter()
663 663 .map(|child_node| self.traverse_dirstate_only(child_node))
664 664 .collect()
665 665 }
666 666
667 667 /// A node in the dirstate tree has no corresponding *file* on the
668 668 /// filesystem
669 669 ///
670 670 /// Does nothing on a "directory" node
671 671 fn mark_removed_or_deleted_if_file(
672 672 &self,
673 673 dirstate_node: &NodeRef<'tree, 'on_disk>,
674 674 ) -> Result<(), DirstateV2ParseError> {
675 675 if let Some(entry) = dirstate_node.entry()? {
676 676 if !entry.any_tracked() {
677 677 // Future-compat for when we start storing ignored and unknown
678 678 // files for caching reasons
679 679 return Ok(());
680 680 }
681 681 let path = dirstate_node.full_path(self.dmap.on_disk)?;
682 682 if self.matcher.matches(path) {
683 683 if entry.removed() {
684 684 self.push_outcome(Outcome::Removed, dirstate_node)?
685 685 } else {
686 686 self.push_outcome(Outcome::Deleted, &dirstate_node)?
687 687 }
688 688 }
689 689 }
690 690 Ok(())
691 691 }
692 692
693 693 /// Something in the filesystem has no corresponding dirstate node
694 694 ///
695 695 /// Returns whether that path is ignored
696 696 fn traverse_fs_only(
697 697 &self,
698 698 has_ignored_ancestor: bool,
699 699 directory_hg_path: &HgPath,
700 700 fs_entry: &DirEntry,
701 701 ) -> bool {
702 702 let hg_path = directory_hg_path.join(&fs_entry.base_name);
703 703 let file_type = fs_entry.metadata.file_type();
704 704 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
705 705 if file_type.is_dir() {
706 706 let is_ignored =
707 707 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
708 708 let traverse_children = if is_ignored {
709 709 // Descendants of an ignored directory are all ignored
710 710 self.options.list_ignored
711 711 } else {
712 712 // Descendants of an unknown directory may be either unknown or
713 713 // ignored
714 714 self.options.list_unknown || self.options.list_ignored
715 715 };
716 716 if traverse_children {
717 717 let is_at_repo_root = false;
718 718 if let Ok(children_fs_entries) = self.read_dir(
719 719 &hg_path,
720 720 &fs_entry.full_path,
721 721 is_at_repo_root,
722 722 ) {
723 723 children_fs_entries.par_iter().for_each(|child_fs_entry| {
724 724 self.traverse_fs_only(
725 725 is_ignored,
726 726 &hg_path,
727 727 child_fs_entry,
728 728 );
729 729 })
730 730 }
731 }
732 if self.options.collect_traversed_dirs {
733 self.outcome.lock().unwrap().traversed.push(hg_path.into())
731 if self.options.collect_traversed_dirs {
732 self.outcome.lock().unwrap().traversed.push(hg_path.into())
733 }
734 734 }
735 735 is_ignored
736 736 } else {
737 737 if file_or_symlink {
738 738 if self.matcher.matches(&hg_path) {
739 739 self.mark_unknown_or_ignored(
740 740 has_ignored_ancestor,
741 741 &BorrowedPath::InMemory(&hg_path),
742 742 )
743 743 } else {
744 744 // We haven’t computed whether this path is ignored. It
745 745 // might not be, and a future run of status might have a
746 746 // different matcher that matches it. So treat it as not
747 747 // ignored. That is, inhibit readdir caching of the parent
748 748 // directory.
749 749 false
750 750 }
751 751 } else {
752 752 // This is neither a directory, a plain file, or a symlink.
753 753 // Treat it like an ignored file.
754 754 true
755 755 }
756 756 }
757 757 }
758 758
759 759 /// Returns whether that path is ignored
760 760 fn mark_unknown_or_ignored(
761 761 &self,
762 762 has_ignored_ancestor: bool,
763 763 hg_path: &BorrowedPath<'_, 'on_disk>,
764 764 ) -> bool {
765 765 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
766 766 if is_ignored {
767 767 if self.options.list_ignored {
768 768 self.push_outcome_without_copy_source(
769 769 Outcome::Ignored,
770 770 hg_path,
771 771 )
772 772 }
773 773 } else {
774 774 if self.options.list_unknown {
775 775 self.push_outcome_without_copy_source(
776 776 Outcome::Unknown,
777 777 hg_path,
778 778 )
779 779 }
780 780 }
781 781 is_ignored
782 782 }
783 783 }
784 784
785 785 struct DirEntry {
786 786 base_name: HgPathBuf,
787 787 full_path: PathBuf,
788 788 metadata: std::fs::Metadata,
789 789 }
790 790
791 791 impl DirEntry {
792 792 /// Returns **unsorted** entries in the given directory, with name and
793 793 /// metadata.
794 794 ///
795 795 /// If a `.hg` sub-directory is encountered:
796 796 ///
797 797 /// * At the repository root, ignore that sub-directory
798 798 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
799 799 /// list instead.
800 800 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
801 801 // `read_dir` returns a "not found" error for the empty path
802 802 let at_cwd = path == Path::new("");
803 803 let read_dir_path = if at_cwd { Path::new(".") } else { path };
804 804 let mut results = Vec::new();
805 805 for entry in read_dir_path.read_dir()? {
806 806 let entry = entry?;
807 807 let metadata = match entry.metadata() {
808 808 Ok(v) => v,
809 809 Err(e) => {
810 810 // race with file deletion?
811 811 if e.kind() == std::io::ErrorKind::NotFound {
812 812 continue;
813 813 } else {
814 814 return Err(e);
815 815 }
816 816 }
817 817 };
818 818 let file_name = entry.file_name();
819 819 // FIXME don't do this when cached
820 820 if file_name == ".hg" {
821 821 if is_at_repo_root {
822 822 // Skip the repo’s own .hg (might be a symlink)
823 823 continue;
824 824 } else if metadata.is_dir() {
825 825 // A .hg sub-directory at another location means a subrepo,
826 826 // skip it entirely.
827 827 return Ok(Vec::new());
828 828 }
829 829 }
830 830 let full_path = if at_cwd {
831 831 file_name.clone().into()
832 832 } else {
833 833 entry.path()
834 834 };
835 835 let base_name = get_bytes_from_os_string(file_name).into();
836 836 results.push(DirEntry {
837 837 base_name,
838 838 full_path,
839 839 metadata,
840 840 })
841 841 }
842 842 Ok(results)
843 843 }
844 844 }
845 845
846 846 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
847 847 /// of the give repository.
848 848 ///
849 849 /// This is similar to `SystemTime::now()`, with the result truncated to the
850 850 /// same time resolution as other files’ modification times. Using `.hg`
851 851 /// instead of the system’s default temporary directory (such as `/tmp`) makes
852 852 /// it more likely the temporary file is in the same disk partition as contents
853 853 /// of the working directory, which can matter since different filesystems may
854 854 /// store timestamps with different resolutions.
855 855 ///
856 856 /// This may fail, typically if we lack write permissions. In that case we
857 857 /// should continue the `status()` algoritm anyway and consider the current
858 858 /// date/time to be unknown.
859 859 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
860 860 tempfile::tempfile_in(repo_root.join(".hg"))?
861 861 .metadata()?
862 862 .modified()
863 863 }
@@ -1,13 +1,12 b''
1 1 skip ignored directories if -i or --all not specified
2 2
3 3 $ hg init t
4 4 $ cd t
5 5 $ echo 'ignored' > .hgignore
6 6 $ hg ci -qA -m init -d'2 0'
7 7 $ mkdir ignored
8
9 The better behavior here is the non-rust behavior, which is to keep
10 the directory and only delete it when -i or --all is given.
11
8 $ ls
9 ignored
12 10 $ hg purge -v --no-confirm
13 removing directory ignored (known-bad-output rust !)
11 $ ls
12 ignored
General Comments 0
You need to be logged in to leave comments. Login now