##// END OF EJS Templates
rust-status: fix typos and add docstrings to dircache related fields
Raphaël Gomès -
r50449:ecf9788c stable
parent child Browse files
Show More
@@ -1,901 +1,903 b''
1 1 use crate::dirstate::entry::TruncatedTimestamp;
2 2 use crate::dirstate::status::IgnoreFnType;
3 3 use crate::dirstate::status::StatusPath;
4 4 use crate::dirstate_tree::dirstate_map::BorrowedPath;
5 5 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
6 6 use crate::dirstate_tree::dirstate_map::DirstateMap;
7 7 use crate::dirstate_tree::dirstate_map::DirstateVersion;
8 8 use crate::dirstate_tree::dirstate_map::NodeRef;
9 9 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
10 10 use crate::matchers::get_ignore_function;
11 11 use crate::matchers::Matcher;
12 12 use crate::utils::files::get_bytes_from_os_string;
13 13 use crate::utils::files::get_path_from_bytes;
14 14 use crate::utils::hg_path::HgPath;
15 15 use crate::BadMatch;
16 16 use crate::DirstateStatus;
17 17 use crate::HgPathBuf;
18 18 use crate::HgPathCow;
19 19 use crate::PatternFileWarning;
20 20 use crate::StatusError;
21 21 use crate::StatusOptions;
22 22 use micro_timer::timed;
23 23 use once_cell::sync::OnceCell;
24 24 use rayon::prelude::*;
25 25 use sha1::{Digest, Sha1};
26 26 use std::borrow::Cow;
27 27 use std::io;
28 28 use std::path::Path;
29 29 use std::path::PathBuf;
30 30 use std::sync::Mutex;
31 31 use std::time::SystemTime;
32 32
33 33 /// Returns the status of the working directory compared to its parent
34 34 /// changeset.
35 35 ///
36 36 /// This algorithm is based on traversing the filesystem tree (`fs` in function
37 37 /// and variable names) and dirstate tree at the same time. The core of this
38 38 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
39 39 /// and its use of `itertools::merge_join_by`. When reaching a path that only
40 40 /// exists in one of the two trees, depending on information requested by
41 41 /// `options` we may need to traverse the remaining subtree.
42 42 #[timed]
43 43 pub fn status<'dirstate>(
44 44 dmap: &'dirstate mut DirstateMap,
45 45 matcher: &(dyn Matcher + Sync),
46 46 root_dir: PathBuf,
47 47 ignore_files: Vec<PathBuf>,
48 48 options: StatusOptions,
49 49 ) -> Result<(DirstateStatus<'dirstate>, Vec<PatternFileWarning>), StatusError>
50 50 {
51 51 // Force the global rayon threadpool to not exceed 16 concurrent threads.
52 52 // This is a stop-gap measure until we figure out why using more than 16
53 53 // threads makes `status` slower for each additional thread.
54 54 // We use `ok()` in case the global threadpool has already been
55 55 // instantiated in `rhg` or some other caller.
56 56 // TODO find the underlying cause and fix it, then remove this.
57 57 rayon::ThreadPoolBuilder::new()
58 58 .num_threads(16)
59 59 .build_global()
60 60 .ok();
61 61
62 62 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
63 63 if options.list_ignored || options.list_unknown {
64 64 let (ignore_fn, warnings, changed) = match dmap.dirstate_version {
65 65 DirstateVersion::V1 => {
66 66 let (ignore_fn, warnings) = get_ignore_function(
67 67 ignore_files,
68 68 &root_dir,
69 69 &mut |_pattern_bytes| {},
70 70 )?;
71 71 (ignore_fn, warnings, None)
72 72 }
73 73 DirstateVersion::V2 => {
74 74 let mut hasher = Sha1::new();
75 75 let (ignore_fn, warnings) = get_ignore_function(
76 76 ignore_files,
77 77 &root_dir,
78 78 &mut |pattern_bytes| hasher.update(pattern_bytes),
79 79 )?;
80 80 let new_hash = *hasher.finalize().as_ref();
81 81 let changed = new_hash != dmap.ignore_patterns_hash;
82 82 dmap.ignore_patterns_hash = new_hash;
83 83 (ignore_fn, warnings, Some(changed))
84 84 }
85 85 };
86 86 (ignore_fn, warnings, changed)
87 87 } else {
88 88 (Box::new(|&_| true), vec![], None)
89 89 };
90 90
91 91 let filesystem_time_at_status_start =
92 92 filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
93 93
94 94 // If the repository is under the current directory, prefer using a
95 95 // relative path, so the kernel needs to traverse fewer directory in every
96 96 // call to `read_dir` or `symlink_metadata`.
97 97 // This is effective in the common case where the current directory is the
98 98 // repository root.
99 99
100 100 // TODO: Better yet would be to use libc functions like `openat` and
101 101 // `fstatat` to remove such repeated traversals entirely, but the standard
102 102 // library does not provide APIs based on those.
103 103 // Maybe with a crate like https://crates.io/crates/openat instead?
104 104 let root_dir = if let Some(relative) = std::env::current_dir()
105 105 .ok()
106 106 .and_then(|cwd| root_dir.strip_prefix(cwd).ok())
107 107 {
108 108 relative
109 109 } else {
110 110 &root_dir
111 111 };
112 112
113 113 let outcome = DirstateStatus {
114 114 filesystem_time_at_status_start,
115 115 ..Default::default()
116 116 };
117 117 let common = StatusCommon {
118 118 dmap,
119 119 options,
120 120 matcher,
121 121 ignore_fn,
122 122 outcome: Mutex::new(outcome),
123 123 ignore_patterns_have_changed: patterns_changed,
124 new_cachable_directories: Default::default(),
125 outated_cached_directories: Default::default(),
124 new_cacheable_directories: Default::default(),
125 outdated_cached_directories: Default::default(),
126 126 filesystem_time_at_status_start,
127 127 };
128 128 let is_at_repo_root = true;
129 129 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
130 130 let has_ignored_ancestor = HasIgnoredAncestor::create(None, hg_path);
131 131 let root_cached_mtime = None;
132 132 let root_dir_metadata = None;
133 133 // If the path we have for the repository root is a symlink, do follow it.
134 134 // (As opposed to symlinks within the working directory which are not
135 135 // followed, using `std::fs::symlink_metadata`.)
136 136 common.traverse_fs_directory_and_dirstate(
137 137 &has_ignored_ancestor,
138 138 dmap.root.as_ref(),
139 139 hg_path,
140 140 &root_dir,
141 141 root_dir_metadata,
142 142 root_cached_mtime,
143 143 is_at_repo_root,
144 144 )?;
145 145 let mut outcome = common.outcome.into_inner().unwrap();
146 let new_cachable = common.new_cachable_directories.into_inner().unwrap();
147 let outdated = common.outated_cached_directories.into_inner().unwrap();
146 let new_cacheable = common.new_cacheable_directories.into_inner().unwrap();
147 let outdated = common.outdated_cached_directories.into_inner().unwrap();
148 148
149 149 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
150 150 || !outdated.is_empty()
151 || (!new_cachable.is_empty()
151 || (!new_cacheable.is_empty()
152 152 && dmap.dirstate_version == DirstateVersion::V2);
153 153
154 154 // Remove outdated mtimes before adding new mtimes, in case a given
155 155 // directory is both
156 156 for path in &outdated {
157 157 dmap.clear_cached_mtime(path)?;
158 158 }
159 for (path, mtime) in &new_cachable {
159 for (path, mtime) in &new_cacheable {
160 160 dmap.set_cached_mtime(path, *mtime)?;
161 161 }
162 162
163 163 Ok((outcome, warnings))
164 164 }
165 165
166 166 /// Bag of random things needed by various parts of the algorithm. Reduces the
167 167 /// number of parameters passed to functions.
168 168 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
169 169 dmap: &'tree DirstateMap<'on_disk>,
170 170 options: StatusOptions,
171 171 matcher: &'a (dyn Matcher + Sync),
172 172 ignore_fn: IgnoreFnType<'a>,
173 173 outcome: Mutex<DirstateStatus<'on_disk>>,
174 new_cachable_directories:
174 /// New timestamps of directories to be used for caching their readdirs
175 new_cacheable_directories:
175 176 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
176 outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
177 /// Used to invalidate the readdir cache of directories
178 outdated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
177 179
178 180 /// Whether ignore files like `.hgignore` have changed since the previous
179 181 /// time a `status()` call wrote their hash to the dirstate. `None` means
180 182 /// we don’t know as this run doesn’t list either ignored or uknown files
181 183 /// and therefore isn’t reading `.hgignore`.
182 184 ignore_patterns_have_changed: Option<bool>,
183 185
184 186 /// The current time at the start of the `status()` algorithm, as measured
185 187 /// and possibly truncated by the filesystem.
186 188 filesystem_time_at_status_start: Option<TruncatedTimestamp>,
187 189 }
188 190
189 191 enum Outcome {
190 192 Modified,
191 193 Added,
192 194 Removed,
193 195 Deleted,
194 196 Clean,
195 197 Ignored,
196 198 Unknown,
197 199 Unsure,
198 200 }
199 201
200 202 /// Lazy computation of whether a given path has a hgignored
201 203 /// ancestor.
202 204 struct HasIgnoredAncestor<'a> {
203 205 /// `path` and `parent` constitute the inputs to the computation,
204 206 /// `cache` stores the outcome.
205 207 path: &'a HgPath,
206 208 parent: Option<&'a HasIgnoredAncestor<'a>>,
207 209 cache: OnceCell<bool>,
208 210 }
209 211
210 212 impl<'a> HasIgnoredAncestor<'a> {
211 213 fn create(
212 214 parent: Option<&'a HasIgnoredAncestor<'a>>,
213 215 path: &'a HgPath,
214 216 ) -> HasIgnoredAncestor<'a> {
215 217 Self {
216 218 path,
217 219 parent,
218 220 cache: OnceCell::new(),
219 221 }
220 222 }
221 223
222 224 fn force<'b>(&self, ignore_fn: &IgnoreFnType<'b>) -> bool {
223 225 match self.parent {
224 226 None => false,
225 227 Some(parent) => {
226 228 *(parent.cache.get_or_init(|| {
227 229 parent.force(ignore_fn) || ignore_fn(&self.path)
228 230 }))
229 231 }
230 232 }
231 233 }
232 234 }
233 235
234 236 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
235 237 fn push_outcome(
236 238 &self,
237 239 which: Outcome,
238 240 dirstate_node: &NodeRef<'tree, 'on_disk>,
239 241 ) -> Result<(), DirstateV2ParseError> {
240 242 let path = dirstate_node
241 243 .full_path_borrowed(self.dmap.on_disk)?
242 244 .detach_from_tree();
243 245 let copy_source = if self.options.list_copies {
244 246 dirstate_node
245 247 .copy_source_borrowed(self.dmap.on_disk)?
246 248 .map(|source| source.detach_from_tree())
247 249 } else {
248 250 None
249 251 };
250 252 self.push_outcome_common(which, path, copy_source);
251 253 Ok(())
252 254 }
253 255
254 256 fn push_outcome_without_copy_source(
255 257 &self,
256 258 which: Outcome,
257 259 path: &BorrowedPath<'_, 'on_disk>,
258 260 ) {
259 261 self.push_outcome_common(which, path.detach_from_tree(), None)
260 262 }
261 263
262 264 fn push_outcome_common(
263 265 &self,
264 266 which: Outcome,
265 267 path: HgPathCow<'on_disk>,
266 268 copy_source: Option<HgPathCow<'on_disk>>,
267 269 ) {
268 270 let mut outcome = self.outcome.lock().unwrap();
269 271 let vec = match which {
270 272 Outcome::Modified => &mut outcome.modified,
271 273 Outcome::Added => &mut outcome.added,
272 274 Outcome::Removed => &mut outcome.removed,
273 275 Outcome::Deleted => &mut outcome.deleted,
274 276 Outcome::Clean => &mut outcome.clean,
275 277 Outcome::Ignored => &mut outcome.ignored,
276 278 Outcome::Unknown => &mut outcome.unknown,
277 279 Outcome::Unsure => &mut outcome.unsure,
278 280 };
279 281 vec.push(StatusPath { path, copy_source });
280 282 }
281 283
282 284 fn read_dir(
283 285 &self,
284 286 hg_path: &HgPath,
285 287 fs_path: &Path,
286 288 is_at_repo_root: bool,
287 289 ) -> Result<Vec<DirEntry>, ()> {
288 290 DirEntry::read_dir(fs_path, is_at_repo_root)
289 291 .map_err(|error| self.io_error(error, hg_path))
290 292 }
291 293
292 294 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
293 295 let errno = error.raw_os_error().expect("expected real OS error");
294 296 self.outcome
295 297 .lock()
296 298 .unwrap()
297 299 .bad
298 300 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
299 301 }
300 302
301 303 fn check_for_outdated_directory_cache(
302 304 &self,
303 305 dirstate_node: &NodeRef<'tree, 'on_disk>,
304 306 ) -> Result<(), DirstateV2ParseError> {
305 307 if self.ignore_patterns_have_changed == Some(true)
306 308 && dirstate_node.cached_directory_mtime()?.is_some()
307 309 {
308 self.outated_cached_directories.lock().unwrap().push(
310 self.outdated_cached_directories.lock().unwrap().push(
309 311 dirstate_node
310 312 .full_path_borrowed(self.dmap.on_disk)?
311 313 .detach_from_tree(),
312 314 )
313 315 }
314 316 Ok(())
315 317 }
316 318
317 319 /// If this returns true, we can get accurate results by only using
318 320 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
319 321 /// need to call `read_dir`.
320 322 fn can_skip_fs_readdir(
321 323 &self,
322 324 directory_metadata: Option<&std::fs::Metadata>,
323 325 cached_directory_mtime: Option<TruncatedTimestamp>,
324 326 ) -> bool {
325 327 if !self.options.list_unknown && !self.options.list_ignored {
326 328 // All states that we care about listing have corresponding
327 329 // dirstate entries.
328 330 // This happens for example with `hg status -mard`.
329 331 return true;
330 332 }
331 333 if !self.options.list_ignored
332 334 && self.ignore_patterns_have_changed == Some(false)
333 335 {
334 336 if let Some(cached_mtime) = cached_directory_mtime {
335 337 // The dirstate contains a cached mtime for this directory, set
336 338 // by a previous run of the `status` algorithm which found this
337 339 // directory eligible for `read_dir` caching.
338 340 if let Some(meta) = directory_metadata {
339 341 if cached_mtime
340 342 .likely_equal_to_mtime_of(meta)
341 343 .unwrap_or(false)
342 344 {
343 345 // The mtime of that directory has not changed
344 346 // since then, which means that the results of
345 347 // `read_dir` should also be unchanged.
346 348 return true;
347 349 }
348 350 }
349 351 }
350 352 }
351 353 false
352 354 }
353 355
354 356 /// Returns whether all child entries of the filesystem directory have a
355 357 /// corresponding dirstate node or are ignored.
356 358 fn traverse_fs_directory_and_dirstate<'ancestor>(
357 359 &self,
358 360 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
359 361 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
360 362 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
361 363 directory_fs_path: &Path,
362 364 directory_metadata: Option<&std::fs::Metadata>,
363 365 cached_directory_mtime: Option<TruncatedTimestamp>,
364 366 is_at_repo_root: bool,
365 367 ) -> Result<bool, DirstateV2ParseError> {
366 368 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
367 369 {
368 370 dirstate_nodes
369 371 .par_iter()
370 372 .map(|dirstate_node| {
371 373 let fs_path = directory_fs_path.join(get_path_from_bytes(
372 374 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
373 375 ));
374 376 match std::fs::symlink_metadata(&fs_path) {
375 377 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
376 378 &fs_path,
377 379 &fs_metadata,
378 380 dirstate_node,
379 381 has_ignored_ancestor,
380 382 ),
381 383 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
382 384 self.traverse_dirstate_only(dirstate_node)
383 385 }
384 386 Err(error) => {
385 387 let hg_path =
386 388 dirstate_node.full_path(self.dmap.on_disk)?;
387 389 Ok(self.io_error(error, hg_path))
388 390 }
389 391 }
390 392 })
391 393 .collect::<Result<_, _>>()?;
392 394
393 395 // We don’t know, so conservatively say this isn’t the case
394 396 let children_all_have_dirstate_node_or_are_ignored = false;
395 397
396 398 return Ok(children_all_have_dirstate_node_or_are_ignored);
397 399 }
398 400
399 401 let mut fs_entries = if let Ok(entries) = self.read_dir(
400 402 directory_hg_path,
401 403 directory_fs_path,
402 404 is_at_repo_root,
403 405 ) {
404 406 entries
405 407 } else {
406 408 // Treat an unreadable directory (typically because of insufficient
407 409 // permissions) like an empty directory. `self.read_dir` has
408 410 // already called `self.io_error` so a warning will be emitted.
409 411 Vec::new()
410 412 };
411 413
412 414 // `merge_join_by` requires both its input iterators to be sorted:
413 415
414 416 let dirstate_nodes = dirstate_nodes.sorted();
415 417 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
416 418 // https://github.com/rust-lang/rust/issues/34162
417 419 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
418 420
419 421 // Propagate here any error that would happen inside the comparison
420 422 // callback below
421 423 for dirstate_node in &dirstate_nodes {
422 424 dirstate_node.base_name(self.dmap.on_disk)?;
423 425 }
424 426 itertools::merge_join_by(
425 427 dirstate_nodes,
426 428 &fs_entries,
427 429 |dirstate_node, fs_entry| {
428 430 // This `unwrap` never panics because we already propagated
429 431 // those errors above
430 432 dirstate_node
431 433 .base_name(self.dmap.on_disk)
432 434 .unwrap()
433 435 .cmp(&fs_entry.base_name)
434 436 },
435 437 )
436 438 .par_bridge()
437 439 .map(|pair| {
438 440 use itertools::EitherOrBoth::*;
439 441 let has_dirstate_node_or_is_ignored;
440 442 match pair {
441 443 Both(dirstate_node, fs_entry) => {
442 444 self.traverse_fs_and_dirstate(
443 445 &fs_entry.full_path,
444 446 &fs_entry.metadata,
445 447 dirstate_node,
446 448 has_ignored_ancestor,
447 449 )?;
448 450 has_dirstate_node_or_is_ignored = true
449 451 }
450 452 Left(dirstate_node) => {
451 453 self.traverse_dirstate_only(dirstate_node)?;
452 454 has_dirstate_node_or_is_ignored = true;
453 455 }
454 456 Right(fs_entry) => {
455 457 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
456 458 has_ignored_ancestor.force(&self.ignore_fn),
457 459 directory_hg_path,
458 460 fs_entry,
459 461 )
460 462 }
461 463 }
462 464 Ok(has_dirstate_node_or_is_ignored)
463 465 })
464 466 .try_reduce(|| true, |a, b| Ok(a && b))
465 467 }
466 468
467 469 fn traverse_fs_and_dirstate<'ancestor>(
468 470 &self,
469 471 fs_path: &Path,
470 472 fs_metadata: &std::fs::Metadata,
471 473 dirstate_node: NodeRef<'tree, 'on_disk>,
472 474 has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>,
473 475 ) -> Result<(), DirstateV2ParseError> {
474 476 self.check_for_outdated_directory_cache(&dirstate_node)?;
475 477 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
476 478 let file_type = fs_metadata.file_type();
477 479 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
478 480 if !file_or_symlink {
479 481 // If we previously had a file here, it was removed (with
480 482 // `hg rm` or similar) or deleted before it could be
481 483 // replaced by a directory or something else.
482 484 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
483 485 }
484 486 if file_type.is_dir() {
485 487 if self.options.collect_traversed_dirs {
486 488 self.outcome
487 489 .lock()
488 490 .unwrap()
489 491 .traversed
490 492 .push(hg_path.detach_from_tree())
491 493 }
492 494 let is_ignored = HasIgnoredAncestor::create(
493 495 Some(&has_ignored_ancestor),
494 496 hg_path,
495 497 );
496 498 let is_at_repo_root = false;
497 499 let children_all_have_dirstate_node_or_are_ignored = self
498 500 .traverse_fs_directory_and_dirstate(
499 501 &is_ignored,
500 502 dirstate_node.children(self.dmap.on_disk)?,
501 503 hg_path,
502 504 fs_path,
503 505 Some(fs_metadata),
504 506 dirstate_node.cached_directory_mtime()?,
505 507 is_at_repo_root,
506 508 )?;
507 509 self.maybe_save_directory_mtime(
508 510 children_all_have_dirstate_node_or_are_ignored,
509 511 fs_metadata,
510 512 dirstate_node,
511 513 )?
512 514 } else {
513 515 if file_or_symlink && self.matcher.matches(&hg_path) {
514 516 if let Some(entry) = dirstate_node.entry()? {
515 517 if !entry.any_tracked() {
516 518 // Forward-compat if we start tracking unknown/ignored
517 519 // files for caching reasons
518 520 self.mark_unknown_or_ignored(
519 521 has_ignored_ancestor.force(&self.ignore_fn),
520 522 &hg_path,
521 523 );
522 524 }
523 525 if entry.added() {
524 526 self.push_outcome(Outcome::Added, &dirstate_node)?;
525 527 } else if entry.removed() {
526 528 self.push_outcome(Outcome::Removed, &dirstate_node)?;
527 529 } else if entry.modified() {
528 530 self.push_outcome(Outcome::Modified, &dirstate_node)?;
529 531 } else {
530 532 self.handle_normal_file(&dirstate_node, fs_metadata)?;
531 533 }
532 534 } else {
533 535 // `node.entry.is_none()` indicates a "directory"
534 536 // node, but the filesystem has a file
535 537 self.mark_unknown_or_ignored(
536 538 has_ignored_ancestor.force(&self.ignore_fn),
537 539 hg_path,
538 540 );
539 541 }
540 542 }
541 543
542 544 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
543 545 {
544 546 self.traverse_dirstate_only(child_node)?
545 547 }
546 548 }
547 549 Ok(())
548 550 }
549 551
550 552 fn maybe_save_directory_mtime(
551 553 &self,
552 554 children_all_have_dirstate_node_or_are_ignored: bool,
553 555 directory_metadata: &std::fs::Metadata,
554 556 dirstate_node: NodeRef<'tree, 'on_disk>,
555 557 ) -> Result<(), DirstateV2ParseError> {
556 558 if !children_all_have_dirstate_node_or_are_ignored {
557 559 return Ok(());
558 560 }
559 561 // All filesystem directory entries from `read_dir` have a
560 562 // corresponding node in the dirstate, so we can reconstitute the
561 563 // names of those entries without calling `read_dir` again.
562 564
563 565 // TODO: use let-else here and below when available:
564 566 // https://github.com/rust-lang/rust/issues/87335
565 567 let status_start = if let Some(status_start) =
566 568 &self.filesystem_time_at_status_start
567 569 {
568 570 status_start
569 571 } else {
570 572 return Ok(());
571 573 };
572 574
573 575 // Although the Rust standard library’s `SystemTime` type
574 576 // has nanosecond precision, the times reported for a
575 577 // directory’s (or file’s) modified time may have lower
576 578 // resolution based on the filesystem (for example ext3
577 579 // only stores integer seconds), kernel (see
578 580 // https://stackoverflow.com/a/14393315/1162888), etc.
579 581 let directory_mtime = if let Ok(option) =
580 582 TruncatedTimestamp::for_reliable_mtime_of(
581 583 directory_metadata,
582 584 status_start,
583 585 ) {
584 586 if let Some(directory_mtime) = option {
585 587 directory_mtime
586 588 } else {
587 589 // The directory was modified too recently,
588 590 // don’t cache its `read_dir` results.
589 591 //
590 592 // 1. A change to this directory (direct child was
591 593 // added or removed) cause its mtime to be set
592 594 // (possibly truncated) to `directory_mtime`
593 595 // 2. This `status` algorithm calls `read_dir`
594 596 // 3. An other change is made to the same directory is
595 597 // made so that calling `read_dir` agin would give
596 598 // different results, but soon enough after 1. that
597 599 // the mtime stays the same
598 600 //
599 601 // On a system where the time resolution poor, this
600 602 // scenario is not unlikely if all three steps are caused
601 603 // by the same script.
602 604 return Ok(());
603 605 }
604 606 } else {
605 607 // OS/libc does not support mtime?
606 608 return Ok(());
607 609 };
608 610 // We’ve observed (through `status_start`) that time has
609 611 // “progressed” since `directory_mtime`, so any further
610 612 // change to this directory is extremely likely to cause a
611 613 // different mtime.
612 614 //
613 615 // Having the same mtime again is not entirely impossible
614 616 // since the system clock is not monotonous. It could jump
615 617 // backward to some point before `directory_mtime`, then a
616 618 // directory change could potentially happen during exactly
617 619 // the wrong tick.
618 620 //
619 621 // We deem this scenario (unlike the previous one) to be
620 622 // unlikely enough in practice.
621 623
622 624 let is_up_to_date =
623 625 if let Some(cached) = dirstate_node.cached_directory_mtime()? {
624 626 cached.likely_equal(directory_mtime)
625 627 } else {
626 628 false
627 629 };
628 630 if !is_up_to_date {
629 631 let hg_path = dirstate_node
630 632 .full_path_borrowed(self.dmap.on_disk)?
631 633 .detach_from_tree();
632 self.new_cachable_directories
634 self.new_cacheable_directories
633 635 .lock()
634 636 .unwrap()
635 637 .push((hg_path, directory_mtime))
636 638 }
637 639 Ok(())
638 640 }
639 641
640 642 /// A file that is clean in the dirstate was found in the filesystem
641 643 fn handle_normal_file(
642 644 &self,
643 645 dirstate_node: &NodeRef<'tree, 'on_disk>,
644 646 fs_metadata: &std::fs::Metadata,
645 647 ) -> Result<(), DirstateV2ParseError> {
646 648 // Keep the low 31 bits
647 649 fn truncate_u64(value: u64) -> i32 {
648 650 (value & 0x7FFF_FFFF) as i32
649 651 }
650 652
651 653 let entry = dirstate_node
652 654 .entry()?
653 655 .expect("handle_normal_file called with entry-less node");
654 656 let mode_changed =
655 657 || self.options.check_exec && entry.mode_changed(fs_metadata);
656 658 let size = entry.size();
657 659 let size_changed = size != truncate_u64(fs_metadata.len());
658 660 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
659 661 // issue6456: Size returned may be longer due to encryption
660 662 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
661 663 self.push_outcome(Outcome::Unsure, dirstate_node)?
662 664 } else if dirstate_node.has_copy_source()
663 665 || entry.is_from_other_parent()
664 666 || (size >= 0 && (size_changed || mode_changed()))
665 667 {
666 668 self.push_outcome(Outcome::Modified, dirstate_node)?
667 669 } else {
668 670 let mtime_looks_clean;
669 671 if let Some(dirstate_mtime) = entry.truncated_mtime() {
670 672 let fs_mtime = TruncatedTimestamp::for_mtime_of(fs_metadata)
671 673 .expect("OS/libc does not support mtime?");
672 674 // There might be a change in the future if for example the
673 675 // internal clock become off while process run, but this is a
674 676 // case where the issues the user would face
675 677 // would be a lot worse and there is nothing we
676 678 // can really do.
677 679 mtime_looks_clean = fs_mtime.likely_equal(dirstate_mtime)
678 680 } else {
679 681 // No mtime in the dirstate entry
680 682 mtime_looks_clean = false
681 683 };
682 684 if !mtime_looks_clean {
683 685 self.push_outcome(Outcome::Unsure, dirstate_node)?
684 686 } else if self.options.list_clean {
685 687 self.push_outcome(Outcome::Clean, dirstate_node)?
686 688 }
687 689 }
688 690 Ok(())
689 691 }
690 692
691 693 /// A node in the dirstate tree has no corresponding filesystem entry
692 694 fn traverse_dirstate_only(
693 695 &self,
694 696 dirstate_node: NodeRef<'tree, 'on_disk>,
695 697 ) -> Result<(), DirstateV2ParseError> {
696 698 self.check_for_outdated_directory_cache(&dirstate_node)?;
697 699 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
698 700 dirstate_node
699 701 .children(self.dmap.on_disk)?
700 702 .par_iter()
701 703 .map(|child_node| self.traverse_dirstate_only(child_node))
702 704 .collect()
703 705 }
704 706
705 707 /// A node in the dirstate tree has no corresponding *file* on the
706 708 /// filesystem
707 709 ///
708 710 /// Does nothing on a "directory" node
709 711 fn mark_removed_or_deleted_if_file(
710 712 &self,
711 713 dirstate_node: &NodeRef<'tree, 'on_disk>,
712 714 ) -> Result<(), DirstateV2ParseError> {
713 715 if let Some(entry) = dirstate_node.entry()? {
714 716 if !entry.any_tracked() {
715 717 // Future-compat for when we start storing ignored and unknown
716 718 // files for caching reasons
717 719 return Ok(());
718 720 }
719 721 let path = dirstate_node.full_path(self.dmap.on_disk)?;
720 722 if self.matcher.matches(path) {
721 723 if entry.removed() {
722 724 self.push_outcome(Outcome::Removed, dirstate_node)?
723 725 } else {
724 726 self.push_outcome(Outcome::Deleted, &dirstate_node)?
725 727 }
726 728 }
727 729 }
728 730 Ok(())
729 731 }
730 732
731 733 /// Something in the filesystem has no corresponding dirstate node
732 734 ///
733 735 /// Returns whether that path is ignored
734 736 fn traverse_fs_only(
735 737 &self,
736 738 has_ignored_ancestor: bool,
737 739 directory_hg_path: &HgPath,
738 740 fs_entry: &DirEntry,
739 741 ) -> bool {
740 742 let hg_path = directory_hg_path.join(&fs_entry.base_name);
741 743 let file_type = fs_entry.metadata.file_type();
742 744 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
743 745 if file_type.is_dir() {
744 746 let is_ignored =
745 747 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
746 748 let traverse_children = if is_ignored {
747 749 // Descendants of an ignored directory are all ignored
748 750 self.options.list_ignored
749 751 } else {
750 752 // Descendants of an unknown directory may be either unknown or
751 753 // ignored
752 754 self.options.list_unknown || self.options.list_ignored
753 755 };
754 756 if traverse_children {
755 757 let is_at_repo_root = false;
756 758 if let Ok(children_fs_entries) = self.read_dir(
757 759 &hg_path,
758 760 &fs_entry.full_path,
759 761 is_at_repo_root,
760 762 ) {
761 763 children_fs_entries.par_iter().for_each(|child_fs_entry| {
762 764 self.traverse_fs_only(
763 765 is_ignored,
764 766 &hg_path,
765 767 child_fs_entry,
766 768 );
767 769 })
768 770 }
769 771 if self.options.collect_traversed_dirs {
770 772 self.outcome.lock().unwrap().traversed.push(hg_path.into())
771 773 }
772 774 }
773 775 is_ignored
774 776 } else {
775 777 if file_or_symlink {
776 778 if self.matcher.matches(&hg_path) {
777 779 self.mark_unknown_or_ignored(
778 780 has_ignored_ancestor,
779 781 &BorrowedPath::InMemory(&hg_path),
780 782 )
781 783 } else {
782 784 // We haven’t computed whether this path is ignored. It
783 785 // might not be, and a future run of status might have a
784 786 // different matcher that matches it. So treat it as not
785 787 // ignored. That is, inhibit readdir caching of the parent
786 788 // directory.
787 789 false
788 790 }
789 791 } else {
790 792 // This is neither a directory, a plain file, or a symlink.
791 793 // Treat it like an ignored file.
792 794 true
793 795 }
794 796 }
795 797 }
796 798
797 799 /// Returns whether that path is ignored
798 800 fn mark_unknown_or_ignored(
799 801 &self,
800 802 has_ignored_ancestor: bool,
801 803 hg_path: &BorrowedPath<'_, 'on_disk>,
802 804 ) -> bool {
803 805 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
804 806 if is_ignored {
805 807 if self.options.list_ignored {
806 808 self.push_outcome_without_copy_source(
807 809 Outcome::Ignored,
808 810 hg_path,
809 811 )
810 812 }
811 813 } else {
812 814 if self.options.list_unknown {
813 815 self.push_outcome_without_copy_source(
814 816 Outcome::Unknown,
815 817 hg_path,
816 818 )
817 819 }
818 820 }
819 821 is_ignored
820 822 }
821 823 }
822 824
823 825 struct DirEntry {
824 826 base_name: HgPathBuf,
825 827 full_path: PathBuf,
826 828 metadata: std::fs::Metadata,
827 829 }
828 830
829 831 impl DirEntry {
830 832 /// Returns **unsorted** entries in the given directory, with name and
831 833 /// metadata.
832 834 ///
833 835 /// If a `.hg` sub-directory is encountered:
834 836 ///
835 837 /// * At the repository root, ignore that sub-directory
836 838 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
837 839 /// list instead.
838 840 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
839 841 // `read_dir` returns a "not found" error for the empty path
840 842 let at_cwd = path == Path::new("");
841 843 let read_dir_path = if at_cwd { Path::new(".") } else { path };
842 844 let mut results = Vec::new();
843 845 for entry in read_dir_path.read_dir()? {
844 846 let entry = entry?;
845 847 let metadata = match entry.metadata() {
846 848 Ok(v) => v,
847 849 Err(e) => {
848 850 // race with file deletion?
849 851 if e.kind() == std::io::ErrorKind::NotFound {
850 852 continue;
851 853 } else {
852 854 return Err(e);
853 855 }
854 856 }
855 857 };
856 858 let file_name = entry.file_name();
857 859 // FIXME don't do this when cached
858 860 if file_name == ".hg" {
859 861 if is_at_repo_root {
860 862 // Skip the repo’s own .hg (might be a symlink)
861 863 continue;
862 864 } else if metadata.is_dir() {
863 865 // A .hg sub-directory at another location means a subrepo,
864 866 // skip it entirely.
865 867 return Ok(Vec::new());
866 868 }
867 869 }
868 870 let full_path = if at_cwd {
869 871 file_name.clone().into()
870 872 } else {
871 873 entry.path()
872 874 };
873 875 let base_name = get_bytes_from_os_string(file_name).into();
874 876 results.push(DirEntry {
875 877 base_name,
876 878 full_path,
877 879 metadata,
878 880 })
879 881 }
880 882 Ok(results)
881 883 }
882 884 }
883 885
884 886 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
885 887 /// of the give repository.
886 888 ///
887 889 /// This is similar to `SystemTime::now()`, with the result truncated to the
888 890 /// same time resolution as other files’ modification times. Using `.hg`
889 891 /// instead of the system’s default temporary directory (such as `/tmp`) makes
890 892 /// it more likely the temporary file is in the same disk partition as contents
891 893 /// of the working directory, which can matter since different filesystems may
892 894 /// store timestamps with different resolutions.
893 895 ///
894 896 /// This may fail, typically if we lack write permissions. In that case we
895 897 /// should continue the `status()` algoritm anyway and consider the current
896 898 /// date/time to be unknown.
897 899 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
898 900 tempfile::tempfile_in(repo_root.join(".hg"))?
899 901 .metadata()?
900 902 .modified()
901 903 }
General Comments 0
You need to be logged in to leave comments. Login now