##// END OF EJS Templates
status: fix hg status race against file deletion...
Arseniy Alekseyev -
r50030:dcec16e7 6.0.3 stable
parent child Browse files
Show More
@@ -1,756 +1,766 b''
1 1 use crate::dirstate::entry::TruncatedTimestamp;
2 2 use crate::dirstate::status::IgnoreFnType;
3 3 use crate::dirstate_tree::dirstate_map::BorrowedPath;
4 4 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
5 5 use crate::dirstate_tree::dirstate_map::DirstateMap;
6 6 use crate::dirstate_tree::dirstate_map::NodeData;
7 7 use crate::dirstate_tree::dirstate_map::NodeRef;
8 8 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
9 9 use crate::matchers::get_ignore_function;
10 10 use crate::matchers::Matcher;
11 11 use crate::utils::files::get_bytes_from_os_string;
12 12 use crate::utils::files::get_path_from_bytes;
13 13 use crate::utils::hg_path::HgPath;
14 14 use crate::BadMatch;
15 15 use crate::DirstateStatus;
16 16 use crate::EntryState;
17 17 use crate::HgPathBuf;
18 18 use crate::PatternFileWarning;
19 19 use crate::StatusError;
20 20 use crate::StatusOptions;
21 21 use micro_timer::timed;
22 22 use rayon::prelude::*;
23 23 use sha1::{Digest, Sha1};
24 24 use std::borrow::Cow;
25 25 use std::io;
26 26 use std::path::Path;
27 27 use std::path::PathBuf;
28 28 use std::sync::Mutex;
29 29 use std::time::SystemTime;
30 30
31 31 /// Returns the status of the working directory compared to its parent
32 32 /// changeset.
33 33 ///
34 34 /// This algorithm is based on traversing the filesystem tree (`fs` in function
35 35 /// and variable names) and dirstate tree at the same time. The core of this
36 36 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
37 37 /// and its use of `itertools::merge_join_by`. When reaching a path that only
38 38 /// exists in one of the two trees, depending on information requested by
39 39 /// `options` we may need to traverse the remaining subtree.
40 40 #[timed]
41 41 pub fn status<'tree, 'on_disk: 'tree>(
42 42 dmap: &'tree mut DirstateMap<'on_disk>,
43 43 matcher: &(dyn Matcher + Sync),
44 44 root_dir: PathBuf,
45 45 ignore_files: Vec<PathBuf>,
46 46 options: StatusOptions,
47 47 ) -> Result<(DirstateStatus<'on_disk>, Vec<PatternFileWarning>), StatusError> {
48 48 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
49 49 if options.list_ignored || options.list_unknown {
50 50 let mut hasher = Sha1::new();
51 51 let (ignore_fn, warnings) = get_ignore_function(
52 52 ignore_files,
53 53 &root_dir,
54 54 &mut |pattern_bytes| hasher.update(pattern_bytes),
55 55 )?;
56 56 let new_hash = *hasher.finalize().as_ref();
57 57 let changed = new_hash != dmap.ignore_patterns_hash;
58 58 dmap.ignore_patterns_hash = new_hash;
59 59 (ignore_fn, warnings, Some(changed))
60 60 } else {
61 61 (Box::new(|&_| true), vec![], None)
62 62 };
63 63
64 64 let common = StatusCommon {
65 65 dmap,
66 66 options,
67 67 matcher,
68 68 ignore_fn,
69 69 outcome: Default::default(),
70 70 ignore_patterns_have_changed: patterns_changed,
71 71 new_cachable_directories: Default::default(),
72 72 outated_cached_directories: Default::default(),
73 73 filesystem_time_at_status_start: filesystem_now(&root_dir).ok(),
74 74 };
75 75 let is_at_repo_root = true;
76 76 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
77 77 let has_ignored_ancestor = false;
78 78 let root_cached_mtime = None;
79 79 let root_dir_metadata = None;
80 80 // If the path we have for the repository root is a symlink, do follow it.
81 81 // (As opposed to symlinks within the working directory which are not
82 82 // followed, using `std::fs::symlink_metadata`.)
83 83 common.traverse_fs_directory_and_dirstate(
84 84 has_ignored_ancestor,
85 85 dmap.root.as_ref(),
86 86 hg_path,
87 87 &root_dir,
88 88 root_dir_metadata,
89 89 root_cached_mtime,
90 90 is_at_repo_root,
91 91 )?;
92 92 let mut outcome = common.outcome.into_inner().unwrap();
93 93 let new_cachable = common.new_cachable_directories.into_inner().unwrap();
94 94 let outdated = common.outated_cached_directories.into_inner().unwrap();
95 95
96 96 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
97 97 || !outdated.is_empty()
98 98 || !new_cachable.is_empty();
99 99
100 100 // Remove outdated mtimes before adding new mtimes, in case a given
101 101 // directory is both
102 102 for path in &outdated {
103 103 let node = dmap.get_or_insert(path)?;
104 104 if let NodeData::CachedDirectory { .. } = &node.data {
105 105 node.data = NodeData::None
106 106 }
107 107 }
108 108 for (path, mtime) in &new_cachable {
109 109 let node = dmap.get_or_insert(path)?;
110 110 match &node.data {
111 111 NodeData::Entry(_) => {} // Don’t overwrite an entry
112 112 NodeData::CachedDirectory { .. } | NodeData::None => {
113 113 node.data = NodeData::CachedDirectory { mtime: *mtime }
114 114 }
115 115 }
116 116 }
117 117
118 118 Ok((outcome, warnings))
119 119 }
120 120
121 121 /// Bag of random things needed by various parts of the algorithm. Reduces the
122 122 /// number of parameters passed to functions.
123 123 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
124 124 dmap: &'tree DirstateMap<'on_disk>,
125 125 options: StatusOptions,
126 126 matcher: &'a (dyn Matcher + Sync),
127 127 ignore_fn: IgnoreFnType<'a>,
128 128 outcome: Mutex<DirstateStatus<'on_disk>>,
129 129 new_cachable_directories:
130 130 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
131 131 outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
132 132
133 133 /// Whether ignore files like `.hgignore` have changed since the previous
134 134 /// time a `status()` call wrote their hash to the dirstate. `None` means
135 135 /// we don’t know as this run doesn’t list either ignored or uknown files
136 136 /// and therefore isn’t reading `.hgignore`.
137 137 ignore_patterns_have_changed: Option<bool>,
138 138
139 139 /// The current time at the start of the `status()` algorithm, as measured
140 140 /// and possibly truncated by the filesystem.
141 141 filesystem_time_at_status_start: Option<SystemTime>,
142 142 }
143 143
144 144 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
145 145 fn read_dir(
146 146 &self,
147 147 hg_path: &HgPath,
148 148 fs_path: &Path,
149 149 is_at_repo_root: bool,
150 150 ) -> Result<Vec<DirEntry>, ()> {
151 151 DirEntry::read_dir(fs_path, is_at_repo_root)
152 152 .map_err(|error| self.io_error(error, hg_path))
153 153 }
154 154
155 155 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
156 156 let errno = error.raw_os_error().expect("expected real OS error");
157 157 self.outcome
158 158 .lock()
159 159 .unwrap()
160 160 .bad
161 161 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
162 162 }
163 163
164 164 fn check_for_outdated_directory_cache(
165 165 &self,
166 166 dirstate_node: &NodeRef<'tree, 'on_disk>,
167 167 ) -> Result<(), DirstateV2ParseError> {
168 168 if self.ignore_patterns_have_changed == Some(true)
169 169 && dirstate_node.cached_directory_mtime()?.is_some()
170 170 {
171 171 self.outated_cached_directories.lock().unwrap().push(
172 172 dirstate_node
173 173 .full_path_borrowed(self.dmap.on_disk)?
174 174 .detach_from_tree(),
175 175 )
176 176 }
177 177 Ok(())
178 178 }
179 179
180 180 /// If this returns true, we can get accurate results by only using
181 181 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
182 182 /// need to call `read_dir`.
183 183 fn can_skip_fs_readdir(
184 184 &self,
185 185 directory_metadata: Option<&std::fs::Metadata>,
186 186 cached_directory_mtime: Option<TruncatedTimestamp>,
187 187 ) -> bool {
188 188 if !self.options.list_unknown && !self.options.list_ignored {
189 189 // All states that we care about listing have corresponding
190 190 // dirstate entries.
191 191 // This happens for example with `hg status -mard`.
192 192 return true;
193 193 }
194 194 if !self.options.list_ignored
195 195 && self.ignore_patterns_have_changed == Some(false)
196 196 {
197 197 if let Some(cached_mtime) = cached_directory_mtime {
198 198 // The dirstate contains a cached mtime for this directory, set
199 199 // by a previous run of the `status` algorithm which found this
200 200 // directory eligible for `read_dir` caching.
201 201 if let Some(meta) = directory_metadata {
202 202 if cached_mtime
203 203 .likely_equal_to_mtime_of(meta)
204 204 .unwrap_or(false)
205 205 {
206 206 // The mtime of that directory has not changed
207 207 // since then, which means that the results of
208 208 // `read_dir` should also be unchanged.
209 209 return true;
210 210 }
211 211 }
212 212 }
213 213 }
214 214 false
215 215 }
216 216
217 217 /// Returns whether all child entries of the filesystem directory have a
218 218 /// corresponding dirstate node or are ignored.
219 219 fn traverse_fs_directory_and_dirstate(
220 220 &self,
221 221 has_ignored_ancestor: bool,
222 222 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
223 223 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
224 224 directory_fs_path: &Path,
225 225 directory_metadata: Option<&std::fs::Metadata>,
226 226 cached_directory_mtime: Option<TruncatedTimestamp>,
227 227 is_at_repo_root: bool,
228 228 ) -> Result<bool, DirstateV2ParseError> {
229 229 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
230 230 {
231 231 dirstate_nodes
232 232 .par_iter()
233 233 .map(|dirstate_node| {
234 234 let fs_path = directory_fs_path.join(get_path_from_bytes(
235 235 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
236 236 ));
237 237 match std::fs::symlink_metadata(&fs_path) {
238 238 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
239 239 &fs_path,
240 240 &fs_metadata,
241 241 dirstate_node,
242 242 has_ignored_ancestor,
243 243 ),
244 244 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
245 245 self.traverse_dirstate_only(dirstate_node)
246 246 }
247 247 Err(error) => {
248 248 let hg_path =
249 249 dirstate_node.full_path(self.dmap.on_disk)?;
250 250 Ok(self.io_error(error, hg_path))
251 251 }
252 252 }
253 253 })
254 254 .collect::<Result<_, _>>()?;
255 255
256 256 // We don’t know, so conservatively say this isn’t the case
257 257 let children_all_have_dirstate_node_or_are_ignored = false;
258 258
259 259 return Ok(children_all_have_dirstate_node_or_are_ignored);
260 260 }
261 261
262 262 let mut fs_entries = if let Ok(entries) = self.read_dir(
263 263 directory_hg_path,
264 264 directory_fs_path,
265 265 is_at_repo_root,
266 266 ) {
267 267 entries
268 268 } else {
269 269 // Treat an unreadable directory (typically because of insufficient
270 270 // permissions) like an empty directory. `self.read_dir` has
271 271 // already called `self.io_error` so a warning will be emitted.
272 272 Vec::new()
273 273 };
274 274
275 275 // `merge_join_by` requires both its input iterators to be sorted:
276 276
277 277 let dirstate_nodes = dirstate_nodes.sorted();
278 278 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
279 279 // https://github.com/rust-lang/rust/issues/34162
280 280 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
281 281
282 282 // Propagate here any error that would happen inside the comparison
283 283 // callback below
284 284 for dirstate_node in &dirstate_nodes {
285 285 dirstate_node.base_name(self.dmap.on_disk)?;
286 286 }
287 287 itertools::merge_join_by(
288 288 dirstate_nodes,
289 289 &fs_entries,
290 290 |dirstate_node, fs_entry| {
291 291 // This `unwrap` never panics because we already propagated
292 292 // those errors above
293 293 dirstate_node
294 294 .base_name(self.dmap.on_disk)
295 295 .unwrap()
296 296 .cmp(&fs_entry.base_name)
297 297 },
298 298 )
299 299 .par_bridge()
300 300 .map(|pair| {
301 301 use itertools::EitherOrBoth::*;
302 302 let has_dirstate_node_or_is_ignored;
303 303 match pair {
304 304 Both(dirstate_node, fs_entry) => {
305 305 self.traverse_fs_and_dirstate(
306 306 &fs_entry.full_path,
307 307 &fs_entry.metadata,
308 308 dirstate_node,
309 309 has_ignored_ancestor,
310 310 )?;
311 311 has_dirstate_node_or_is_ignored = true
312 312 }
313 313 Left(dirstate_node) => {
314 314 self.traverse_dirstate_only(dirstate_node)?;
315 315 has_dirstate_node_or_is_ignored = true;
316 316 }
317 317 Right(fs_entry) => {
318 318 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
319 319 has_ignored_ancestor,
320 320 directory_hg_path,
321 321 fs_entry,
322 322 )
323 323 }
324 324 }
325 325 Ok(has_dirstate_node_or_is_ignored)
326 326 })
327 327 .try_reduce(|| true, |a, b| Ok(a && b))
328 328 }
329 329
330 330 fn traverse_fs_and_dirstate(
331 331 &self,
332 332 fs_path: &Path,
333 333 fs_metadata: &std::fs::Metadata,
334 334 dirstate_node: NodeRef<'tree, 'on_disk>,
335 335 has_ignored_ancestor: bool,
336 336 ) -> Result<(), DirstateV2ParseError> {
337 337 self.check_for_outdated_directory_cache(&dirstate_node)?;
338 338 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
339 339 let file_type = fs_metadata.file_type();
340 340 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
341 341 if !file_or_symlink {
342 342 // If we previously had a file here, it was removed (with
343 343 // `hg rm` or similar) or deleted before it could be
344 344 // replaced by a directory or something else.
345 345 self.mark_removed_or_deleted_if_file(
346 346 &hg_path,
347 347 dirstate_node.state()?,
348 348 );
349 349 }
350 350 if file_type.is_dir() {
351 351 if self.options.collect_traversed_dirs {
352 352 self.outcome
353 353 .lock()
354 354 .unwrap()
355 355 .traversed
356 356 .push(hg_path.detach_from_tree())
357 357 }
358 358 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
359 359 let is_at_repo_root = false;
360 360 let children_all_have_dirstate_node_or_are_ignored = self
361 361 .traverse_fs_directory_and_dirstate(
362 362 is_ignored,
363 363 dirstate_node.children(self.dmap.on_disk)?,
364 364 hg_path,
365 365 fs_path,
366 366 Some(fs_metadata),
367 367 dirstate_node.cached_directory_mtime()?,
368 368 is_at_repo_root,
369 369 )?;
370 370 self.maybe_save_directory_mtime(
371 371 children_all_have_dirstate_node_or_are_ignored,
372 372 fs_metadata,
373 373 dirstate_node,
374 374 )?
375 375 } else {
376 376 if file_or_symlink && self.matcher.matches(hg_path) {
377 377 if let Some(state) = dirstate_node.state()? {
378 378 match state {
379 379 EntryState::Added => self
380 380 .outcome
381 381 .lock()
382 382 .unwrap()
383 383 .added
384 384 .push(hg_path.detach_from_tree()),
385 385 EntryState::Removed => self
386 386 .outcome
387 387 .lock()
388 388 .unwrap()
389 389 .removed
390 390 .push(hg_path.detach_from_tree()),
391 391 EntryState::Merged => self
392 392 .outcome
393 393 .lock()
394 394 .unwrap()
395 395 .modified
396 396 .push(hg_path.detach_from_tree()),
397 397 EntryState::Normal => self
398 398 .handle_normal_file(&dirstate_node, fs_metadata)?,
399 399 }
400 400 } else {
401 401 // `node.entry.is_none()` indicates a "directory"
402 402 // node, but the filesystem has a file
403 403 self.mark_unknown_or_ignored(
404 404 has_ignored_ancestor,
405 405 hg_path,
406 406 );
407 407 }
408 408 }
409 409
410 410 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
411 411 {
412 412 self.traverse_dirstate_only(child_node)?
413 413 }
414 414 }
415 415 Ok(())
416 416 }
417 417
418 418 fn maybe_save_directory_mtime(
419 419 &self,
420 420 children_all_have_dirstate_node_or_are_ignored: bool,
421 421 directory_metadata: &std::fs::Metadata,
422 422 dirstate_node: NodeRef<'tree, 'on_disk>,
423 423 ) -> Result<(), DirstateV2ParseError> {
424 424 if children_all_have_dirstate_node_or_are_ignored {
425 425 // All filesystem directory entries from `read_dir` have a
426 426 // corresponding node in the dirstate, so we can reconstitute the
427 427 // names of those entries without calling `read_dir` again.
428 428 if let (Some(status_start), Ok(directory_mtime)) = (
429 429 &self.filesystem_time_at_status_start,
430 430 directory_metadata.modified(),
431 431 ) {
432 432 // Although the Rust standard library’s `SystemTime` type
433 433 // has nanosecond precision, the times reported for a
434 434 // directory’s (or file’s) modified time may have lower
435 435 // resolution based on the filesystem (for example ext3
436 436 // only stores integer seconds), kernel (see
437 437 // https://stackoverflow.com/a/14393315/1162888), etc.
438 438 if &directory_mtime >= status_start {
439 439 // The directory was modified too recently, don’t cache its
440 440 // `read_dir` results.
441 441 //
442 442 // A timeline like this is possible:
443 443 //
444 444 // 1. A change to this directory (direct child was
445 445 // added or removed) cause its mtime to be set
446 446 // (possibly truncated) to `directory_mtime`
447 447 // 2. This `status` algorithm calls `read_dir`
448 448 // 3. An other change is made to the same directory is
449 449 // made so that calling `read_dir` agin would give
450 450 // different results, but soon enough after 1. that
451 451 // the mtime stays the same
452 452 //
453 453 // On a system where the time resolution poor, this
454 454 // scenario is not unlikely if all three steps are caused
455 455 // by the same script.
456 456 } else {
457 457 // We’ve observed (through `status_start`) that time has
458 458 // “progressed” since `directory_mtime`, so any further
459 459 // change to this directory is extremely likely to cause a
460 460 // different mtime.
461 461 //
462 462 // Having the same mtime again is not entirely impossible
463 463 // since the system clock is not monotonous. It could jump
464 464 // backward to some point before `directory_mtime`, then a
465 465 // directory change could potentially happen during exactly
466 466 // the wrong tick.
467 467 //
468 468 // We deem this scenario (unlike the previous one) to be
469 469 // unlikely enough in practice.
470 470 let truncated = TruncatedTimestamp::from(directory_mtime);
471 471 let is_up_to_date = if let Some(cached) =
472 472 dirstate_node.cached_directory_mtime()?
473 473 {
474 474 cached.likely_equal(truncated)
475 475 } else {
476 476 false
477 477 };
478 478 if !is_up_to_date {
479 479 let hg_path = dirstate_node
480 480 .full_path_borrowed(self.dmap.on_disk)?
481 481 .detach_from_tree();
482 482 self.new_cachable_directories
483 483 .lock()
484 484 .unwrap()
485 485 .push((hg_path, truncated))
486 486 }
487 487 }
488 488 }
489 489 }
490 490 Ok(())
491 491 }
492 492
493 493 /// A file with `EntryState::Normal` in the dirstate was found in the
494 494 /// filesystem
495 495 fn handle_normal_file(
496 496 &self,
497 497 dirstate_node: &NodeRef<'tree, 'on_disk>,
498 498 fs_metadata: &std::fs::Metadata,
499 499 ) -> Result<(), DirstateV2ParseError> {
500 500 // Keep the low 31 bits
501 501 fn truncate_u64(value: u64) -> i32 {
502 502 (value & 0x7FFF_FFFF) as i32
503 503 }
504 504
505 505 let entry = dirstate_node
506 506 .entry()?
507 507 .expect("handle_normal_file called with entry-less node");
508 508 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
509 509 let mode_changed =
510 510 || self.options.check_exec && entry.mode_changed(fs_metadata);
511 511 let size = entry.size();
512 512 let size_changed = size != truncate_u64(fs_metadata.len());
513 513 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
514 514 // issue6456: Size returned may be longer due to encryption
515 515 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
516 516 self.outcome
517 517 .lock()
518 518 .unwrap()
519 519 .unsure
520 520 .push(hg_path.detach_from_tree())
521 521 } else if dirstate_node.has_copy_source()
522 522 || entry.is_from_other_parent()
523 523 || (size >= 0 && (size_changed || mode_changed()))
524 524 {
525 525 self.outcome
526 526 .lock()
527 527 .unwrap()
528 528 .modified
529 529 .push(hg_path.detach_from_tree())
530 530 } else {
531 531 let mtime_looks_clean;
532 532 if let Some(dirstate_mtime) = entry.truncated_mtime() {
533 533 let fs_mtime = TruncatedTimestamp::for_mtime_of(fs_metadata)
534 534 .expect("OS/libc does not support mtime?");
535 535 mtime_looks_clean = fs_mtime.likely_equal(dirstate_mtime)
536 536 && !fs_mtime.likely_equal(self.options.last_normal_time)
537 537 } else {
538 538 // No mtime in the dirstate entry
539 539 mtime_looks_clean = false
540 540 };
541 541 if !mtime_looks_clean {
542 542 self.outcome
543 543 .lock()
544 544 .unwrap()
545 545 .unsure
546 546 .push(hg_path.detach_from_tree())
547 547 } else if self.options.list_clean {
548 548 self.outcome
549 549 .lock()
550 550 .unwrap()
551 551 .clean
552 552 .push(hg_path.detach_from_tree())
553 553 }
554 554 }
555 555 Ok(())
556 556 }
557 557
558 558 /// A node in the dirstate tree has no corresponding filesystem entry
559 559 fn traverse_dirstate_only(
560 560 &self,
561 561 dirstate_node: NodeRef<'tree, 'on_disk>,
562 562 ) -> Result<(), DirstateV2ParseError> {
563 563 self.check_for_outdated_directory_cache(&dirstate_node)?;
564 564 self.mark_removed_or_deleted_if_file(
565 565 &dirstate_node.full_path_borrowed(self.dmap.on_disk)?,
566 566 dirstate_node.state()?,
567 567 );
568 568 dirstate_node
569 569 .children(self.dmap.on_disk)?
570 570 .par_iter()
571 571 .map(|child_node| self.traverse_dirstate_only(child_node))
572 572 .collect()
573 573 }
574 574
575 575 /// A node in the dirstate tree has no corresponding *file* on the
576 576 /// filesystem
577 577 ///
578 578 /// Does nothing on a "directory" node
579 579 fn mark_removed_or_deleted_if_file(
580 580 &self,
581 581 hg_path: &BorrowedPath<'tree, 'on_disk>,
582 582 dirstate_node_state: Option<EntryState>,
583 583 ) {
584 584 if let Some(state) = dirstate_node_state {
585 585 if self.matcher.matches(hg_path) {
586 586 if let EntryState::Removed = state {
587 587 self.outcome
588 588 .lock()
589 589 .unwrap()
590 590 .removed
591 591 .push(hg_path.detach_from_tree())
592 592 } else {
593 593 self.outcome
594 594 .lock()
595 595 .unwrap()
596 596 .deleted
597 597 .push(hg_path.detach_from_tree())
598 598 }
599 599 }
600 600 }
601 601 }
602 602
603 603 /// Something in the filesystem has no corresponding dirstate node
604 604 ///
605 605 /// Returns whether that path is ignored
606 606 fn traverse_fs_only(
607 607 &self,
608 608 has_ignored_ancestor: bool,
609 609 directory_hg_path: &HgPath,
610 610 fs_entry: &DirEntry,
611 611 ) -> bool {
612 612 let hg_path = directory_hg_path.join(&fs_entry.base_name);
613 613 let file_type = fs_entry.metadata.file_type();
614 614 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
615 615 if file_type.is_dir() {
616 616 let is_ignored =
617 617 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
618 618 let traverse_children = if is_ignored {
619 619 // Descendants of an ignored directory are all ignored
620 620 self.options.list_ignored
621 621 } else {
622 622 // Descendants of an unknown directory may be either unknown or
623 623 // ignored
624 624 self.options.list_unknown || self.options.list_ignored
625 625 };
626 626 if traverse_children {
627 627 let is_at_repo_root = false;
628 628 if let Ok(children_fs_entries) = self.read_dir(
629 629 &hg_path,
630 630 &fs_entry.full_path,
631 631 is_at_repo_root,
632 632 ) {
633 633 children_fs_entries.par_iter().for_each(|child_fs_entry| {
634 634 self.traverse_fs_only(
635 635 is_ignored,
636 636 &hg_path,
637 637 child_fs_entry,
638 638 );
639 639 })
640 640 }
641 641 }
642 642 if self.options.collect_traversed_dirs {
643 643 self.outcome.lock().unwrap().traversed.push(hg_path.into())
644 644 }
645 645 is_ignored
646 646 } else {
647 647 if file_or_symlink {
648 648 if self.matcher.matches(&hg_path) {
649 649 self.mark_unknown_or_ignored(
650 650 has_ignored_ancestor,
651 651 &BorrowedPath::InMemory(&hg_path),
652 652 )
653 653 } else {
654 654 // We haven’t computed whether this path is ignored. It
655 655 // might not be, and a future run of status might have a
656 656 // different matcher that matches it. So treat it as not
657 657 // ignored. That is, inhibit readdir caching of the parent
658 658 // directory.
659 659 false
660 660 }
661 661 } else {
662 662 // This is neither a directory, a plain file, or a symlink.
663 663 // Treat it like an ignored file.
664 664 true
665 665 }
666 666 }
667 667 }
668 668
669 669 /// Returns whether that path is ignored
670 670 fn mark_unknown_or_ignored(
671 671 &self,
672 672 has_ignored_ancestor: bool,
673 673 hg_path: &BorrowedPath<'_, 'on_disk>,
674 674 ) -> bool {
675 675 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
676 676 if is_ignored {
677 677 if self.options.list_ignored {
678 678 self.outcome
679 679 .lock()
680 680 .unwrap()
681 681 .ignored
682 682 .push(hg_path.detach_from_tree())
683 683 }
684 684 } else {
685 685 if self.options.list_unknown {
686 686 self.outcome
687 687 .lock()
688 688 .unwrap()
689 689 .unknown
690 690 .push(hg_path.detach_from_tree())
691 691 }
692 692 }
693 693 is_ignored
694 694 }
695 695 }
696 696
697 697 struct DirEntry {
698 698 base_name: HgPathBuf,
699 699 full_path: PathBuf,
700 700 metadata: std::fs::Metadata,
701 701 }
702 702
703 703 impl DirEntry {
704 704 /// Returns **unsorted** entries in the given directory, with name and
705 705 /// metadata.
706 706 ///
707 707 /// If a `.hg` sub-directory is encountered:
708 708 ///
709 709 /// * At the repository root, ignore that sub-directory
710 710 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
711 711 /// list instead.
712 712 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
713 713 let mut results = Vec::new();
714 714 for entry in path.read_dir()? {
715 715 let entry = entry?;
716 let metadata = entry.metadata()?;
716 let metadata = match entry.metadata() {
717 Ok(v) => v,
718 Err(e) => {
719 // race with file deletion?
720 if e.kind() == std::io::ErrorKind::NotFound {
721 continue;
722 } else {
723 return Err(e);
724 }
725 }
726 };
717 727 let name = get_bytes_from_os_string(entry.file_name());
718 728 // FIXME don't do this when cached
719 729 if name == b".hg" {
720 730 if is_at_repo_root {
721 731 // Skip the repo’s own .hg (might be a symlink)
722 732 continue;
723 733 } else if metadata.is_dir() {
724 734 // A .hg sub-directory at another location means a subrepo,
725 735 // skip it entirely.
726 736 return Ok(Vec::new());
727 737 }
728 738 }
729 739 results.push(DirEntry {
730 740 base_name: name.into(),
731 741 full_path: entry.path(),
732 742 metadata,
733 743 })
734 744 }
735 745 Ok(results)
736 746 }
737 747 }
738 748
739 749 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
740 750 /// of the give repository.
741 751 ///
742 752 /// This is similar to `SystemTime::now()`, with the result truncated to the
743 753 /// same time resolution as other files’ modification times. Using `.hg`
744 754 /// instead of the system’s default temporary directory (such as `/tmp`) makes
745 755 /// it more likely the temporary file is in the same disk partition as contents
746 756 /// of the working directory, which can matter since different filesystems may
747 757 /// store timestamps with different resolutions.
748 758 ///
749 759 /// This may fail, typically if we lack write permissions. In that case we
750 760 /// should continue the `status()` algoritm anyway and consider the current
751 761 /// date/time to be unknown.
752 762 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
753 763 tempfile::tempfile_in(repo_root.join(".hg"))?
754 764 .metadata()?
755 765 .modified()
756 766 }
General Comments 0
You need to be logged in to leave comments. Login now