##// END OF EJS Templates
dirstate: fix the bug in [status] dealing with committed&ignored directories...
Arseniy Alekseyev -
r51224:edcc35a4 stable
parent child Browse files
Show More
@@ -1,1002 +1,1002 b''
1 1 use crate::dirstate::entry::TruncatedTimestamp;
2 2 use crate::dirstate::status::IgnoreFnType;
3 3 use crate::dirstate::status::StatusPath;
4 4 use crate::dirstate_tree::dirstate_map::BorrowedPath;
5 5 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
6 6 use crate::dirstate_tree::dirstate_map::DirstateMap;
7 7 use crate::dirstate_tree::dirstate_map::DirstateVersion;
8 8 use crate::dirstate_tree::dirstate_map::NodeRef;
9 9 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
10 10 use crate::matchers::get_ignore_function;
11 11 use crate::matchers::Matcher;
12 12 use crate::utils::files::get_bytes_from_os_string;
13 13 use crate::utils::files::get_bytes_from_path;
14 14 use crate::utils::files::get_path_from_bytes;
15 15 use crate::utils::hg_path::HgPath;
16 16 use crate::BadMatch;
17 17 use crate::DirstateStatus;
18 18 use crate::HgPathCow;
19 19 use crate::PatternFileWarning;
20 20 use crate::StatusError;
21 21 use crate::StatusOptions;
22 22 use once_cell::sync::OnceCell;
23 23 use rayon::prelude::*;
24 24 use sha1::{Digest, Sha1};
25 25 use std::borrow::Cow;
26 26 use std::io;
27 27 use std::path::Path;
28 28 use std::path::PathBuf;
29 29 use std::sync::Mutex;
30 30 use std::time::SystemTime;
31 31
32 32 /// Returns the status of the working directory compared to its parent
33 33 /// changeset.
34 34 ///
35 35 /// This algorithm is based on traversing the filesystem tree (`fs` in function
36 36 /// and variable names) and dirstate tree at the same time. The core of this
37 37 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
38 38 /// and its use of `itertools::merge_join_by`. When reaching a path that only
39 39 /// exists in one of the two trees, depending on information requested by
40 40 /// `options` we may need to traverse the remaining subtree.
41 41 #[logging_timer::time("trace")]
42 42 pub fn status<'dirstate>(
43 43 dmap: &'dirstate mut DirstateMap,
44 44 matcher: &(dyn Matcher + Sync),
45 45 root_dir: PathBuf,
46 46 ignore_files: Vec<PathBuf>,
47 47 options: StatusOptions,
48 48 ) -> Result<(DirstateStatus<'dirstate>, Vec<PatternFileWarning>), StatusError>
49 49 {
50 50 // Force the global rayon threadpool to not exceed 16 concurrent threads.
51 51 // This is a stop-gap measure until we figure out why using more than 16
52 52 // threads makes `status` slower for each additional thread.
53 53 // We use `ok()` in case the global threadpool has already been
54 54 // instantiated in `rhg` or some other caller.
55 55 // TODO find the underlying cause and fix it, then remove this.
56 56 rayon::ThreadPoolBuilder::new()
57 57 .num_threads(16.min(rayon::current_num_threads()))
58 58 .build_global()
59 59 .ok();
60 60
61 61 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
62 62 if options.list_ignored || options.list_unknown {
63 63 let (ignore_fn, warnings, changed) = match dmap.dirstate_version {
64 64 DirstateVersion::V1 => {
65 65 let (ignore_fn, warnings) = get_ignore_function(
66 66 ignore_files,
67 67 &root_dir,
68 68 &mut |_source, _pattern_bytes| {},
69 69 )?;
70 70 (ignore_fn, warnings, None)
71 71 }
72 72 DirstateVersion::V2 => {
73 73 let mut hasher = Sha1::new();
74 74 let (ignore_fn, warnings) = get_ignore_function(
75 75 ignore_files,
76 76 &root_dir,
77 77 &mut |source, pattern_bytes| {
78 78 // If inside the repo, use the relative version to
79 79 // make it deterministic inside tests.
80 80 // The performance hit should be negligible.
81 81 let source = source
82 82 .strip_prefix(&root_dir)
83 83 .unwrap_or(source);
84 84 let source = get_bytes_from_path(source);
85 85
86 86 let mut subhasher = Sha1::new();
87 87 subhasher.update(pattern_bytes);
88 88 let patterns_hash = subhasher.finalize();
89 89
90 90 hasher.update(source);
91 91 hasher.update(b" ");
92 92 hasher.update(patterns_hash);
93 93 hasher.update(b"\n");
94 94 },
95 95 )?;
96 96 let new_hash = *hasher.finalize().as_ref();
97 97 let changed = new_hash != dmap.ignore_patterns_hash;
98 98 dmap.ignore_patterns_hash = new_hash;
99 99 (ignore_fn, warnings, Some(changed))
100 100 }
101 101 };
102 102 (ignore_fn, warnings, changed)
103 103 } else {
104 104 (Box::new(|&_| true), vec![], None)
105 105 };
106 106
107 107 let filesystem_time_at_status_start =
108 108 filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
109 109
110 110 // If the repository is under the current directory, prefer using a
111 111 // relative path, so the kernel needs to traverse fewer directory in every
112 112 // call to `read_dir` or `symlink_metadata`.
113 113 // This is effective in the common case where the current directory is the
114 114 // repository root.
115 115
116 116 // TODO: Better yet would be to use libc functions like `openat` and
117 117 // `fstatat` to remove such repeated traversals entirely, but the standard
118 118 // library does not provide APIs based on those.
119 119 // Maybe with a crate like https://crates.io/crates/openat instead?
120 120 let root_dir = if let Some(relative) = std::env::current_dir()
121 121 .ok()
122 122 .and_then(|cwd| root_dir.strip_prefix(cwd).ok())
123 123 {
124 124 relative
125 125 } else {
126 126 &root_dir
127 127 };
128 128
129 129 let outcome = DirstateStatus {
130 130 filesystem_time_at_status_start,
131 131 ..Default::default()
132 132 };
133 133 let common = StatusCommon {
134 134 dmap,
135 135 options,
136 136 matcher,
137 137 ignore_fn,
138 138 outcome: Mutex::new(outcome),
139 139 ignore_patterns_have_changed: patterns_changed,
140 140 new_cacheable_directories: Default::default(),
141 141 outdated_cached_directories: Default::default(),
142 142 filesystem_time_at_status_start,
143 143 };
144 144 let is_at_repo_root = true;
145 145 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
146 146 let has_ignored_ancestor = HasIgnoredAncestor::create(None, hg_path);
147 147 let root_cached_mtime = None;
148 148 // If the path we have for the repository root is a symlink, do follow it.
149 149 // (As opposed to symlinks within the working directory which are not
150 150 // followed, using `std::fs::symlink_metadata`.)
151 151 common.traverse_fs_directory_and_dirstate(
152 152 &has_ignored_ancestor,
153 153 dmap.root.as_ref(),
154 154 hg_path,
155 155 &DirEntry {
156 156 hg_path: Cow::Borrowed(HgPath::new(b"")),
157 157 fs_path: Cow::Borrowed(root_dir),
158 158 symlink_metadata: None,
159 159 file_type: FakeFileType::Directory,
160 160 },
161 161 root_cached_mtime,
162 162 is_at_repo_root,
163 163 )?;
164 164 let mut outcome = common.outcome.into_inner().unwrap();
165 165 let new_cacheable = common.new_cacheable_directories.into_inner().unwrap();
166 166 let outdated = common.outdated_cached_directories.into_inner().unwrap();
167 167
168 168 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
169 169 || !outdated.is_empty()
170 170 || (!new_cacheable.is_empty()
171 171 && dmap.dirstate_version == DirstateVersion::V2);
172 172
173 173 // Remove outdated mtimes before adding new mtimes, in case a given
174 174 // directory is both
175 175 for path in &outdated {
176 176 dmap.clear_cached_mtime(path)?;
177 177 }
178 178 for (path, mtime) in &new_cacheable {
179 179 dmap.set_cached_mtime(path, *mtime)?;
180 180 }
181 181
182 182 Ok((outcome, warnings))
183 183 }
184 184
185 185 /// Bag of random things needed by various parts of the algorithm. Reduces the
186 186 /// number of parameters passed to functions.
187 187 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
188 188 dmap: &'tree DirstateMap<'on_disk>,
189 189 options: StatusOptions,
190 190 matcher: &'a (dyn Matcher + Sync),
191 191 ignore_fn: IgnoreFnType<'a>,
192 192 outcome: Mutex<DirstateStatus<'on_disk>>,
193 193 /// New timestamps of directories to be used for caching their readdirs
194 194 new_cacheable_directories:
195 195 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
196 196 /// Used to invalidate the readdir cache of directories
197 197 outdated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
198 198
199 199 /// Whether ignore files like `.hgignore` have changed since the previous
200 200 /// time a `status()` call wrote their hash to the dirstate. `None` means
201 201 /// we don’t know as this run doesn’t list either ignored or uknown files
202 202 /// and therefore isn’t reading `.hgignore`.
203 203 ignore_patterns_have_changed: Option<bool>,
204 204
205 205 /// The current time at the start of the `status()` algorithm, as measured
206 206 /// and possibly truncated by the filesystem.
207 207 filesystem_time_at_status_start: Option<TruncatedTimestamp>,
208 208 }
209 209
210 210 enum Outcome {
211 211 Modified,
212 212 Added,
213 213 Removed,
214 214 Deleted,
215 215 Clean,
216 216 Ignored,
217 217 Unknown,
218 218 Unsure,
219 219 }
220 220
221 221 /// Lazy computation of whether a given path has a hgignored
222 222 /// ancestor.
223 223 struct HasIgnoredAncestor<'a> {
224 224 /// `path` and `parent` constitute the inputs to the computation,
225 225 /// `cache` stores the outcome.
226 226 path: &'a HgPath,
227 227 parent: Option<&'a HasIgnoredAncestor<'a>>,
228 228 cache: OnceCell<bool>,
229 229 }
230 230
231 231 impl<'a> HasIgnoredAncestor<'a> {
232 232 fn create(
233 233 parent: Option<&'a HasIgnoredAncestor<'a>>,
234 234 path: &'a HgPath,
235 235 ) -> HasIgnoredAncestor<'a> {
236 236 Self {
237 237 path,
238 238 parent,
239 239 cache: OnceCell::new(),
240 240 }
241 241 }
242 242
243 243 fn force<'b>(&self, ignore_fn: &IgnoreFnType<'b>) -> bool {
244 244 match self.parent {
245 245 None => false,
246 246 Some(parent) => {
247 *(parent.cache.get_or_init(|| {
247 *(self.cache.get_or_init(|| {
248 248 parent.force(ignore_fn) || ignore_fn(self.path)
249 249 }))
250 250 }
251 251 }
252 252 }
253 253 }
254 254
255 255 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
256 256 fn push_outcome(
257 257 &self,
258 258 which: Outcome,
259 259 dirstate_node: &NodeRef<'tree, 'on_disk>,
260 260 ) -> Result<(), DirstateV2ParseError> {
261 261 let path = dirstate_node
262 262 .full_path_borrowed(self.dmap.on_disk)?
263 263 .detach_from_tree();
264 264 let copy_source = if self.options.list_copies {
265 265 dirstate_node
266 266 .copy_source_borrowed(self.dmap.on_disk)?
267 267 .map(|source| source.detach_from_tree())
268 268 } else {
269 269 None
270 270 };
271 271 self.push_outcome_common(which, path, copy_source);
272 272 Ok(())
273 273 }
274 274
275 275 fn push_outcome_without_copy_source(
276 276 &self,
277 277 which: Outcome,
278 278 path: &BorrowedPath<'_, 'on_disk>,
279 279 ) {
280 280 self.push_outcome_common(which, path.detach_from_tree(), None)
281 281 }
282 282
283 283 fn push_outcome_common(
284 284 &self,
285 285 which: Outcome,
286 286 path: HgPathCow<'on_disk>,
287 287 copy_source: Option<HgPathCow<'on_disk>>,
288 288 ) {
289 289 let mut outcome = self.outcome.lock().unwrap();
290 290 let vec = match which {
291 291 Outcome::Modified => &mut outcome.modified,
292 292 Outcome::Added => &mut outcome.added,
293 293 Outcome::Removed => &mut outcome.removed,
294 294 Outcome::Deleted => &mut outcome.deleted,
295 295 Outcome::Clean => &mut outcome.clean,
296 296 Outcome::Ignored => &mut outcome.ignored,
297 297 Outcome::Unknown => &mut outcome.unknown,
298 298 Outcome::Unsure => &mut outcome.unsure,
299 299 };
300 300 vec.push(StatusPath { path, copy_source });
301 301 }
302 302
303 303 fn read_dir(
304 304 &self,
305 305 hg_path: &HgPath,
306 306 fs_path: &Path,
307 307 is_at_repo_root: bool,
308 308 ) -> Result<Vec<DirEntry>, ()> {
309 309 DirEntry::read_dir(fs_path, is_at_repo_root)
310 310 .map_err(|error| self.io_error(error, hg_path))
311 311 }
312 312
313 313 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
314 314 let errno = error.raw_os_error().expect("expected real OS error");
315 315 self.outcome
316 316 .lock()
317 317 .unwrap()
318 318 .bad
319 319 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
320 320 }
321 321
322 322 fn check_for_outdated_directory_cache(
323 323 &self,
324 324 dirstate_node: &NodeRef<'tree, 'on_disk>,
325 325 ) -> Result<bool, DirstateV2ParseError> {
326 326 if self.ignore_patterns_have_changed == Some(true)
327 327 && dirstate_node.cached_directory_mtime()?.is_some()
328 328 {
329 329 self.outdated_cached_directories.lock().unwrap().push(
330 330 dirstate_node
331 331 .full_path_borrowed(self.dmap.on_disk)?
332 332 .detach_from_tree(),
333 333 );
334 334 return Ok(true);
335 335 }
336 336 Ok(false)
337 337 }
338 338
339 339 /// If this returns true, we can get accurate results by only using
340 340 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
341 341 /// need to call `read_dir`.
342 342 fn can_skip_fs_readdir(
343 343 &self,
344 344 directory_entry: &DirEntry,
345 345 cached_directory_mtime: Option<TruncatedTimestamp>,
346 346 ) -> bool {
347 347 if !self.options.list_unknown && !self.options.list_ignored {
348 348 // All states that we care about listing have corresponding
349 349 // dirstate entries.
350 350 // This happens for example with `hg status -mard`.
351 351 return true;
352 352 }
353 353 if !self.options.list_ignored
354 354 && self.ignore_patterns_have_changed == Some(false)
355 355 {
356 356 if let Some(cached_mtime) = cached_directory_mtime {
357 357 // The dirstate contains a cached mtime for this directory, set
358 358 // by a previous run of the `status` algorithm which found this
359 359 // directory eligible for `read_dir` caching.
360 360 if let Ok(meta) = directory_entry.symlink_metadata() {
361 361 if cached_mtime
362 362 .likely_equal_to_mtime_of(&meta)
363 363 .unwrap_or(false)
364 364 {
365 365 // The mtime of that directory has not changed
366 366 // since then, which means that the results of
367 367 // `read_dir` should also be unchanged.
368 368 return true;
369 369 }
370 370 }
371 371 }
372 372 }
373 373 false
374 374 }
375 375
376 376 /// Returns whether all child entries of the filesystem directory have a
377 377 /// corresponding dirstate node or are ignored.
378 378 fn traverse_fs_directory_and_dirstate<'ancestor>(
379 379 &self,
380 380 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
381 381 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
382 382 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
383 383 directory_entry: &DirEntry,
384 384 cached_directory_mtime: Option<TruncatedTimestamp>,
385 385 is_at_repo_root: bool,
386 386 ) -> Result<bool, DirstateV2ParseError> {
387 387 if self.can_skip_fs_readdir(directory_entry, cached_directory_mtime) {
388 388 dirstate_nodes
389 389 .par_iter()
390 390 .map(|dirstate_node| {
391 391 let fs_path = &directory_entry.fs_path;
392 392 let fs_path = fs_path.join(get_path_from_bytes(
393 393 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
394 394 ));
395 395 match std::fs::symlink_metadata(&fs_path) {
396 396 Ok(fs_metadata) => {
397 397 let file_type =
398 398 match fs_metadata.file_type().try_into() {
399 399 Ok(file_type) => file_type,
400 400 Err(_) => return Ok(()),
401 401 };
402 402 let entry = DirEntry {
403 403 hg_path: Cow::Borrowed(
404 404 dirstate_node
405 405 .full_path(self.dmap.on_disk)?,
406 406 ),
407 407 fs_path: Cow::Borrowed(&fs_path),
408 408 symlink_metadata: Some(fs_metadata),
409 409 file_type,
410 410 };
411 411 self.traverse_fs_and_dirstate(
412 412 &entry,
413 413 dirstate_node,
414 414 has_ignored_ancestor,
415 415 )
416 416 }
417 417 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
418 418 self.traverse_dirstate_only(dirstate_node)
419 419 }
420 420 Err(error) => {
421 421 let hg_path =
422 422 dirstate_node.full_path(self.dmap.on_disk)?;
423 423 self.io_error(error, hg_path);
424 424 Ok(())
425 425 }
426 426 }
427 427 })
428 428 .collect::<Result<_, _>>()?;
429 429
430 430 // We don’t know, so conservatively say this isn’t the case
431 431 let children_all_have_dirstate_node_or_are_ignored = false;
432 432
433 433 return Ok(children_all_have_dirstate_node_or_are_ignored);
434 434 }
435 435
436 436 let readdir_succeeded;
437 437 let mut fs_entries = if let Ok(entries) = self.read_dir(
438 438 directory_hg_path,
439 439 &directory_entry.fs_path,
440 440 is_at_repo_root,
441 441 ) {
442 442 readdir_succeeded = true;
443 443 entries
444 444 } else {
445 445 // Treat an unreadable directory (typically because of insufficient
446 446 // permissions) like an empty directory. `self.read_dir` has
447 447 // already called `self.io_error` so a warning will be emitted.
448 448 // We still need to remember that there was an error so that we
449 449 // know not to cache this result.
450 450 readdir_succeeded = false;
451 451 Vec::new()
452 452 };
453 453
454 454 // `merge_join_by` requires both its input iterators to be sorted:
455 455
456 456 let dirstate_nodes = dirstate_nodes.sorted();
457 457 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
458 458 // https://github.com/rust-lang/rust/issues/34162
459 459 fs_entries.sort_unstable_by(|e1, e2| e1.hg_path.cmp(&e2.hg_path));
460 460
461 461 // Propagate here any error that would happen inside the comparison
462 462 // callback below
463 463 for dirstate_node in &dirstate_nodes {
464 464 dirstate_node.base_name(self.dmap.on_disk)?;
465 465 }
466 466 itertools::merge_join_by(
467 467 dirstate_nodes,
468 468 &fs_entries,
469 469 |dirstate_node, fs_entry| {
470 470 // This `unwrap` never panics because we already propagated
471 471 // those errors above
472 472 dirstate_node
473 473 .base_name(self.dmap.on_disk)
474 474 .unwrap()
475 475 .cmp(&fs_entry.hg_path)
476 476 },
477 477 )
478 478 .par_bridge()
479 479 .map(|pair| {
480 480 use itertools::EitherOrBoth::*;
481 481 let has_dirstate_node_or_is_ignored = match pair {
482 482 Both(dirstate_node, fs_entry) => {
483 483 self.traverse_fs_and_dirstate(
484 484 fs_entry,
485 485 dirstate_node,
486 486 has_ignored_ancestor,
487 487 )?;
488 488 true
489 489 }
490 490 Left(dirstate_node) => {
491 491 self.traverse_dirstate_only(dirstate_node)?;
492 492 true
493 493 }
494 494 Right(fs_entry) => self.traverse_fs_only(
495 495 has_ignored_ancestor.force(&self.ignore_fn),
496 496 directory_hg_path,
497 497 fs_entry,
498 498 ),
499 499 };
500 500 Ok(has_dirstate_node_or_is_ignored)
501 501 })
502 502 .try_reduce(|| true, |a, b| Ok(a && b))
503 503 .map(|res| res && readdir_succeeded)
504 504 }
505 505
506 506 fn traverse_fs_and_dirstate<'ancestor>(
507 507 &self,
508 508 fs_entry: &DirEntry,
509 509 dirstate_node: NodeRef<'tree, 'on_disk>,
510 510 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
511 511 ) -> Result<(), DirstateV2ParseError> {
512 512 let outdated_dircache =
513 513 self.check_for_outdated_directory_cache(&dirstate_node)?;
514 514 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
515 515 let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink();
516 516 if !file_or_symlink {
517 517 // If we previously had a file here, it was removed (with
518 518 // `hg rm` or similar) or deleted before it could be
519 519 // replaced by a directory or something else.
520 520 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
521 521 }
522 522 if fs_entry.is_dir() {
523 523 if self.options.collect_traversed_dirs {
524 524 self.outcome
525 525 .lock()
526 526 .unwrap()
527 527 .traversed
528 528 .push(hg_path.detach_from_tree())
529 529 }
530 530 let is_ignored = HasIgnoredAncestor::create(
531 531 Some(has_ignored_ancestor),
532 532 hg_path,
533 533 );
534 534 let is_at_repo_root = false;
535 535 let children_all_have_dirstate_node_or_are_ignored = self
536 536 .traverse_fs_directory_and_dirstate(
537 537 &is_ignored,
538 538 dirstate_node.children(self.dmap.on_disk)?,
539 539 hg_path,
540 540 fs_entry,
541 541 dirstate_node.cached_directory_mtime()?,
542 542 is_at_repo_root,
543 543 )?;
544 544 self.maybe_save_directory_mtime(
545 545 children_all_have_dirstate_node_or_are_ignored,
546 546 fs_entry,
547 547 dirstate_node,
548 548 outdated_dircache,
549 549 )?
550 550 } else {
551 551 if file_or_symlink && self.matcher.matches(hg_path) {
552 552 if let Some(entry) = dirstate_node.entry()? {
553 553 if !entry.any_tracked() {
554 554 // Forward-compat if we start tracking unknown/ignored
555 555 // files for caching reasons
556 556 self.mark_unknown_or_ignored(
557 557 has_ignored_ancestor.force(&self.ignore_fn),
558 558 hg_path,
559 559 );
560 560 }
561 561 if entry.added() {
562 562 self.push_outcome(Outcome::Added, &dirstate_node)?;
563 563 } else if entry.removed() {
564 564 self.push_outcome(Outcome::Removed, &dirstate_node)?;
565 565 } else if entry.modified() {
566 566 self.push_outcome(Outcome::Modified, &dirstate_node)?;
567 567 } else {
568 568 self.handle_normal_file(&dirstate_node, fs_entry)?;
569 569 }
570 570 } else {
571 571 // `node.entry.is_none()` indicates a "directory"
572 572 // node, but the filesystem has a file
573 573 self.mark_unknown_or_ignored(
574 574 has_ignored_ancestor.force(&self.ignore_fn),
575 575 hg_path,
576 576 );
577 577 }
578 578 }
579 579
580 580 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
581 581 {
582 582 self.traverse_dirstate_only(child_node)?
583 583 }
584 584 }
585 585 Ok(())
586 586 }
587 587
588 588 /// Save directory mtime if applicable.
589 589 ///
590 590 /// `outdated_directory_cache` is `true` if we've just invalidated the
591 591 /// cache for this directory in `check_for_outdated_directory_cache`,
592 592 /// which forces the update.
593 593 fn maybe_save_directory_mtime(
594 594 &self,
595 595 children_all_have_dirstate_node_or_are_ignored: bool,
596 596 directory_entry: &DirEntry,
597 597 dirstate_node: NodeRef<'tree, 'on_disk>,
598 598 outdated_directory_cache: bool,
599 599 ) -> Result<(), DirstateV2ParseError> {
600 600 if !children_all_have_dirstate_node_or_are_ignored {
601 601 return Ok(());
602 602 }
603 603 // All filesystem directory entries from `read_dir` have a
604 604 // corresponding node in the dirstate, so we can reconstitute the
605 605 // names of those entries without calling `read_dir` again.
606 606
607 607 // TODO: use let-else here and below when available:
608 608 // https://github.com/rust-lang/rust/issues/87335
609 609 let status_start = if let Some(status_start) =
610 610 &self.filesystem_time_at_status_start
611 611 {
612 612 status_start
613 613 } else {
614 614 return Ok(());
615 615 };
616 616
617 617 // Although the Rust standard library’s `SystemTime` type
618 618 // has nanosecond precision, the times reported for a
619 619 // directory’s (or file’s) modified time may have lower
620 620 // resolution based on the filesystem (for example ext3
621 621 // only stores integer seconds), kernel (see
622 622 // https://stackoverflow.com/a/14393315/1162888), etc.
623 623 let metadata = match directory_entry.symlink_metadata() {
624 624 Ok(meta) => meta,
625 625 Err(_) => return Ok(()),
626 626 };
627 627
628 628 let directory_mtime = match TruncatedTimestamp::for_reliable_mtime_of(
629 629 &metadata,
630 630 status_start,
631 631 ) {
632 632 Ok(Some(directory_mtime)) => directory_mtime,
633 633 Ok(None) => {
634 634 // The directory was modified too recently,
635 635 // don’t cache its `read_dir` results.
636 636 //
637 637 // 1. A change to this directory (direct child was
638 638 // added or removed) cause its mtime to be set
639 639 // (possibly truncated) to `directory_mtime`
640 640 // 2. This `status` algorithm calls `read_dir`
641 641 // 3. An other change is made to the same directory is
642 642 // made so that calling `read_dir` agin would give
643 643 // different results, but soon enough after 1. that
644 644 // the mtime stays the same
645 645 //
646 646 // On a system where the time resolution poor, this
647 647 // scenario is not unlikely if all three steps are caused
648 648 // by the same script.
649 649 return Ok(());
650 650 }
651 651 Err(_) => {
652 652 // OS/libc does not support mtime?
653 653 return Ok(());
654 654 }
655 655 };
656 656 // We’ve observed (through `status_start`) that time has
657 657 // “progressed” since `directory_mtime`, so any further
658 658 // change to this directory is extremely likely to cause a
659 659 // different mtime.
660 660 //
661 661 // Having the same mtime again is not entirely impossible
662 662 // since the system clock is not monotonous. It could jump
663 663 // backward to some point before `directory_mtime`, then a
664 664 // directory change could potentially happen during exactly
665 665 // the wrong tick.
666 666 //
667 667 // We deem this scenario (unlike the previous one) to be
668 668 // unlikely enough in practice.
669 669
670 670 let is_up_to_date = if let Some(cached) =
671 671 dirstate_node.cached_directory_mtime()?
672 672 {
673 673 !outdated_directory_cache && cached.likely_equal(directory_mtime)
674 674 } else {
675 675 false
676 676 };
677 677 if !is_up_to_date {
678 678 let hg_path = dirstate_node
679 679 .full_path_borrowed(self.dmap.on_disk)?
680 680 .detach_from_tree();
681 681 self.new_cacheable_directories
682 682 .lock()
683 683 .unwrap()
684 684 .push((hg_path, directory_mtime))
685 685 }
686 686 Ok(())
687 687 }
688 688
689 689 /// A file that is clean in the dirstate was found in the filesystem
690 690 fn handle_normal_file(
691 691 &self,
692 692 dirstate_node: &NodeRef<'tree, 'on_disk>,
693 693 fs_entry: &DirEntry,
694 694 ) -> Result<(), DirstateV2ParseError> {
695 695 // Keep the low 31 bits
696 696 fn truncate_u64(value: u64) -> i32 {
697 697 (value & 0x7FFF_FFFF) as i32
698 698 }
699 699
700 700 let fs_metadata = match fs_entry.symlink_metadata() {
701 701 Ok(meta) => meta,
702 702 Err(_) => return Ok(()),
703 703 };
704 704
705 705 let entry = dirstate_node
706 706 .entry()?
707 707 .expect("handle_normal_file called with entry-less node");
708 708 let mode_changed =
709 709 || self.options.check_exec && entry.mode_changed(&fs_metadata);
710 710 let size = entry.size();
711 711 let size_changed = size != truncate_u64(fs_metadata.len());
712 712 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
713 713 // issue6456: Size returned may be longer due to encryption
714 714 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
715 715 self.push_outcome(Outcome::Unsure, dirstate_node)?
716 716 } else if dirstate_node.has_copy_source()
717 717 || entry.is_from_other_parent()
718 718 || (size >= 0 && (size_changed || mode_changed()))
719 719 {
720 720 self.push_outcome(Outcome::Modified, dirstate_node)?
721 721 } else {
722 722 let mtime_looks_clean = if let Some(dirstate_mtime) =
723 723 entry.truncated_mtime()
724 724 {
725 725 let fs_mtime = TruncatedTimestamp::for_mtime_of(&fs_metadata)
726 726 .expect("OS/libc does not support mtime?");
727 727 // There might be a change in the future if for example the
728 728 // internal clock become off while process run, but this is a
729 729 // case where the issues the user would face
730 730 // would be a lot worse and there is nothing we
731 731 // can really do.
732 732 fs_mtime.likely_equal(dirstate_mtime)
733 733 } else {
734 734 // No mtime in the dirstate entry
735 735 false
736 736 };
737 737 if !mtime_looks_clean {
738 738 self.push_outcome(Outcome::Unsure, dirstate_node)?
739 739 } else if self.options.list_clean {
740 740 self.push_outcome(Outcome::Clean, dirstate_node)?
741 741 }
742 742 }
743 743 Ok(())
744 744 }
745 745
746 746 /// A node in the dirstate tree has no corresponding filesystem entry
747 747 fn traverse_dirstate_only(
748 748 &self,
749 749 dirstate_node: NodeRef<'tree, 'on_disk>,
750 750 ) -> Result<(), DirstateV2ParseError> {
751 751 self.check_for_outdated_directory_cache(&dirstate_node)?;
752 752 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
753 753 dirstate_node
754 754 .children(self.dmap.on_disk)?
755 755 .par_iter()
756 756 .map(|child_node| self.traverse_dirstate_only(child_node))
757 757 .collect()
758 758 }
759 759
760 760 /// A node in the dirstate tree has no corresponding *file* on the
761 761 /// filesystem
762 762 ///
763 763 /// Does nothing on a "directory" node
764 764 fn mark_removed_or_deleted_if_file(
765 765 &self,
766 766 dirstate_node: &NodeRef<'tree, 'on_disk>,
767 767 ) -> Result<(), DirstateV2ParseError> {
768 768 if let Some(entry) = dirstate_node.entry()? {
769 769 if !entry.any_tracked() {
770 770 // Future-compat for when we start storing ignored and unknown
771 771 // files for caching reasons
772 772 return Ok(());
773 773 }
774 774 let path = dirstate_node.full_path(self.dmap.on_disk)?;
775 775 if self.matcher.matches(path) {
776 776 if entry.removed() {
777 777 self.push_outcome(Outcome::Removed, dirstate_node)?
778 778 } else {
779 779 self.push_outcome(Outcome::Deleted, dirstate_node)?
780 780 }
781 781 }
782 782 }
783 783 Ok(())
784 784 }
785 785
786 786 /// Something in the filesystem has no corresponding dirstate node
787 787 ///
788 788 /// Returns whether that path is ignored
789 789 fn traverse_fs_only(
790 790 &self,
791 791 has_ignored_ancestor: bool,
792 792 directory_hg_path: &HgPath,
793 793 fs_entry: &DirEntry,
794 794 ) -> bool {
795 795 let hg_path = directory_hg_path.join(&fs_entry.hg_path);
796 796 let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink();
797 797 if fs_entry.is_dir() {
798 798 let is_ignored =
799 799 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
800 800 let traverse_children = if is_ignored {
801 801 // Descendants of an ignored directory are all ignored
802 802 self.options.list_ignored
803 803 } else {
804 804 // Descendants of an unknown directory may be either unknown or
805 805 // ignored
806 806 self.options.list_unknown || self.options.list_ignored
807 807 };
808 808 if traverse_children {
809 809 let is_at_repo_root = false;
810 810 if let Ok(children_fs_entries) =
811 811 self.read_dir(&hg_path, &fs_entry.fs_path, is_at_repo_root)
812 812 {
813 813 children_fs_entries.par_iter().for_each(|child_fs_entry| {
814 814 self.traverse_fs_only(
815 815 is_ignored,
816 816 &hg_path,
817 817 child_fs_entry,
818 818 );
819 819 })
820 820 }
821 821 if self.options.collect_traversed_dirs {
822 822 self.outcome.lock().unwrap().traversed.push(hg_path.into())
823 823 }
824 824 }
825 825 is_ignored
826 826 } else if file_or_symlink {
827 827 if self.matcher.matches(&hg_path) {
828 828 self.mark_unknown_or_ignored(
829 829 has_ignored_ancestor,
830 830 &BorrowedPath::InMemory(&hg_path),
831 831 )
832 832 } else {
833 833 // We haven’t computed whether this path is ignored. It
834 834 // might not be, and a future run of status might have a
835 835 // different matcher that matches it. So treat it as not
836 836 // ignored. That is, inhibit readdir caching of the parent
837 837 // directory.
838 838 false
839 839 }
840 840 } else {
841 841 // This is neither a directory, a plain file, or a symlink.
842 842 // Treat it like an ignored file.
843 843 true
844 844 }
845 845 }
846 846
847 847 /// Returns whether that path is ignored
848 848 fn mark_unknown_or_ignored(
849 849 &self,
850 850 has_ignored_ancestor: bool,
851 851 hg_path: &BorrowedPath<'_, 'on_disk>,
852 852 ) -> bool {
853 853 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
854 854 if is_ignored {
855 855 if self.options.list_ignored {
856 856 self.push_outcome_without_copy_source(
857 857 Outcome::Ignored,
858 858 hg_path,
859 859 )
860 860 }
861 861 } else if self.options.list_unknown {
862 862 self.push_outcome_without_copy_source(Outcome::Unknown, hg_path)
863 863 }
864 864 is_ignored
865 865 }
866 866 }
867 867
868 868 /// Since [`std::fs::FileType`] cannot be built directly, we emulate what we
869 869 /// care about.
870 870 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
871 871 enum FakeFileType {
872 872 File,
873 873 Directory,
874 874 Symlink,
875 875 }
876 876
877 877 impl TryFrom<std::fs::FileType> for FakeFileType {
878 878 type Error = ();
879 879
880 880 fn try_from(f: std::fs::FileType) -> Result<Self, Self::Error> {
881 881 if f.is_dir() {
882 882 Ok(Self::Directory)
883 883 } else if f.is_file() {
884 884 Ok(Self::File)
885 885 } else if f.is_symlink() {
886 886 Ok(Self::Symlink)
887 887 } else {
888 888 // Things like FIFO etc.
889 889 Err(())
890 890 }
891 891 }
892 892 }
893 893
894 894 struct DirEntry<'a> {
895 895 /// Path as stored in the dirstate, or just the filename for optimization.
896 896 hg_path: HgPathCow<'a>,
897 897 /// Filesystem path
898 898 fs_path: Cow<'a, Path>,
899 899 /// Lazily computed
900 900 symlink_metadata: Option<std::fs::Metadata>,
901 901 /// Already computed for ergonomics.
902 902 file_type: FakeFileType,
903 903 }
904 904
905 905 impl<'a> DirEntry<'a> {
906 906 /// Returns **unsorted** entries in the given directory, with name,
907 907 /// metadata and file type.
908 908 ///
909 909 /// If a `.hg` sub-directory is encountered:
910 910 ///
911 911 /// * At the repository root, ignore that sub-directory
912 912 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
913 913 /// list instead.
914 914 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
915 915 // `read_dir` returns a "not found" error for the empty path
916 916 let at_cwd = path == Path::new("");
917 917 let read_dir_path = if at_cwd { Path::new(".") } else { path };
918 918 let mut results = Vec::new();
919 919 for entry in read_dir_path.read_dir()? {
920 920 let entry = entry?;
921 921 let file_type = match entry.file_type() {
922 922 Ok(v) => v,
923 923 Err(e) => {
924 924 // race with file deletion?
925 925 if e.kind() == std::io::ErrorKind::NotFound {
926 926 continue;
927 927 } else {
928 928 return Err(e);
929 929 }
930 930 }
931 931 };
932 932 let file_name = entry.file_name();
933 933 // FIXME don't do this when cached
934 934 if file_name == ".hg" {
935 935 if is_at_repo_root {
936 936 // Skip the repo’s own .hg (might be a symlink)
937 937 continue;
938 938 } else if file_type.is_dir() {
939 939 // A .hg sub-directory at another location means a subrepo,
940 940 // skip it entirely.
941 941 return Ok(Vec::new());
942 942 }
943 943 }
944 944 let full_path = if at_cwd {
945 945 file_name.clone().into()
946 946 } else {
947 947 entry.path()
948 948 };
949 949 let filename =
950 950 Cow::Owned(get_bytes_from_os_string(file_name).into());
951 951 let file_type = match FakeFileType::try_from(file_type) {
952 952 Ok(file_type) => file_type,
953 953 Err(_) => continue,
954 954 };
955 955 results.push(DirEntry {
956 956 hg_path: filename,
957 957 fs_path: Cow::Owned(full_path.to_path_buf()),
958 958 symlink_metadata: None,
959 959 file_type,
960 960 })
961 961 }
962 962 Ok(results)
963 963 }
964 964
965 965 fn symlink_metadata(&self) -> Result<std::fs::Metadata, std::io::Error> {
966 966 match &self.symlink_metadata {
967 967 Some(meta) => Ok(meta.clone()),
968 968 None => std::fs::symlink_metadata(&self.fs_path),
969 969 }
970 970 }
971 971
972 972 fn is_dir(&self) -> bool {
973 973 self.file_type == FakeFileType::Directory
974 974 }
975 975
976 976 fn is_file(&self) -> bool {
977 977 self.file_type == FakeFileType::File
978 978 }
979 979
980 980 fn is_symlink(&self) -> bool {
981 981 self.file_type == FakeFileType::Symlink
982 982 }
983 983 }
984 984
985 985 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
986 986 /// of the give repository.
987 987 ///
988 988 /// This is similar to `SystemTime::now()`, with the result truncated to the
989 989 /// same time resolution as other files’ modification times. Using `.hg`
990 990 /// instead of the system’s default temporary directory (such as `/tmp`) makes
991 991 /// it more likely the temporary file is in the same disk partition as contents
992 992 /// of the working directory, which can matter since different filesystems may
993 993 /// store timestamps with different resolutions.
994 994 ///
995 995 /// This may fail, typically if we lack write permissions. In that case we
996 996 /// should continue the `status()` algoritm anyway and consider the current
997 997 /// date/time to be unknown.
998 998 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
999 999 tempfile::tempfile_in(repo_root.join(".hg"))?
1000 1000 .metadata()?
1001 1001 .modified()
1002 1002 }
@@ -1,35 +1,33 b''
1 1 #testcases dirstate-v1 dirstate-v2
2 2
3 3 #if dirstate-v2
4 4 $ cat >> $HGRCPATH << EOF
5 5 > [format]
6 6 > use-dirstate-v2=1
7 7 > [storage]
8 8 > dirstate-v2.slow-path=allow
9 9 > EOF
10 10 #endif
11 11
12 12 $ rm -rf r
13 13
14 14 $ hg init r
15 15 $ cd r
16 16 $ mkdir d1
17 17 $ mkdir d2
18 18 $ touch d1/f d2/f
19 19 $ hg commit -Am '.'
20 20 adding d1/f
21 21 adding d2/f
22 22 $ echo 'syntax:re' >> .hgignore
23 23 $ echo '^d1$' >> .hgignore
24 24 $ hg commit -Am "ignore d1"
25 25 adding .hgignore
26 26
27 27 Now d1 is a directory that's both committed and ignored.
28 28 Untracked files in d2 are still shown, but ones in d1 are ignored:
29 29
30 30 $ touch d1/g
31 31 $ touch d2/g
32 32 $ RAYON_NUM_THREADS=1 hg status
33 ? d2/g (no-rust no-rhg !)
34
35 ^ BUG: d2/g does not show up with rust status
33 ? d2/g
General Comments 0
You need to be logged in to leave comments. Login now