##// END OF EJS Templates
hg-core: define a `dirstate_status` `Operation`...
Raphaël Gomès -
r45673:98817e5d default
parent child Browse files
Show More
@@ -0,0 +1,76 b''
1 // dirstate_status.rs
2 //
3 // Copyright 2019, Raphaël Gomès <rgomes@octobus.net>
4 //
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
7
8 use crate::dirstate::status::{build_response, Dispatch, HgPathCow, Status};
9 use crate::matchers::Matcher;
10 use crate::operations::Operation;
11 use crate::{DirstateStatus, StatusError};
12
13 /// A tuple of the paths that need to be checked in the filelog because it's
14 /// ambiguous whether they've changed, and the rest of the already dispatched
15 /// files.
16 pub type LookupAndStatus<'a> = (Vec<HgPathCow<'a>>, DirstateStatus<'a>);
17
18 impl<'a, M: Matcher + Sync> Operation<LookupAndStatus<'a>> for Status<'a, M> {
19 type Error = StatusError;
20
21 fn run(&self) -> Result<LookupAndStatus<'a>, Self::Error> {
22 let (traversed_sender, traversed_receiver) =
23 crossbeam::channel::unbounded();
24
25 // Step 1: check the files explicitly mentioned by the user
26 let (work, mut results) = self.walk_explicit(traversed_sender.clone());
27
28 if !work.is_empty() {
29 // Hashmaps are quite a bit slower to build than vecs, so only
30 // build it if needed.
31 let old_results = results.iter().cloned().collect();
32
33 // Step 2: recursively check the working directory for changes if
34 // needed
35 for (dir, dispatch) in work {
36 match dispatch {
37 Dispatch::Directory { was_file } => {
38 if was_file {
39 results.push((dir.to_owned(), Dispatch::Removed));
40 }
41 if self.options.list_ignored
42 || self.options.list_unknown
43 && !self.dir_ignore(&dir)
44 {
45 self.traverse(
46 &dir,
47 &old_results,
48 &mut results,
49 traversed_sender.clone(),
50 )?;
51 }
52 }
53 _ => {
54 unreachable!("There can only be directories in `work`")
55 }
56 }
57 }
58 }
59
60 if !self.matcher.is_exact() {
61 if self.options.list_unknown {
62 self.handle_unknowns(&mut results)?;
63 } else {
64 // TODO this is incorrect, see issue6335
65 // This requires a fix in both Python and Rust that can happen
66 // with other pending changes to `status`.
67 self.extend_from_dmap(&mut results);
68 }
69 }
70
71 drop(traversed_sender);
72 let traversed = traversed_receiver.into_iter().collect();
73
74 Ok(build_response(results, traversed))
75 }
76 }
@@ -1,954 +1,907 b''
1 1 // status.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Rust implementation of dirstate.status (dirstate.py).
9 9 //! It is currently missing a lot of functionality compared to the Python one
10 10 //! and will only be triggered in narrow cases.
11 11
12 12 use crate::{
13 13 dirstate::SIZE_FROM_OTHER_PARENT,
14 14 filepatterns::PatternFileWarning,
15 15 matchers::{get_ignore_function, Matcher, VisitChildrenSet},
16 operations::Operation,
16 17 utils::{
17 18 files::{find_dirs, HgMetadata},
18 19 hg_path::{
19 20 hg_path_to_path_buf, os_string_to_hg_path_buf, HgPath, HgPathBuf,
20 21 HgPathError,
21 22 },
22 23 path_auditor::PathAuditor,
23 24 },
24 25 CopyMap, DirstateEntry, DirstateMap, EntryState, FastHashMap,
25 26 PatternError,
26 27 };
27 28 use lazy_static::lazy_static;
28 29 use micro_timer::timed;
29 30 use rayon::prelude::*;
30 31 use std::{
31 32 borrow::Cow,
32 33 collections::HashSet,
33 34 fs::{read_dir, DirEntry},
34 35 io::ErrorKind,
35 36 ops::Deref,
36 37 path::{Path, PathBuf},
37 38 };
38 39
39 40 /// Wrong type of file from a `BadMatch`
40 41 /// Note: a lot of those don't exist on all platforms.
41 42 #[derive(Debug, Copy, Clone)]
42 43 pub enum BadType {
43 44 CharacterDevice,
44 45 BlockDevice,
45 46 FIFO,
46 47 Socket,
47 48 Directory,
48 49 Unknown,
49 50 }
50 51
51 52 impl ToString for BadType {
52 53 fn to_string(&self) -> String {
53 54 match self {
54 55 BadType::CharacterDevice => "character device",
55 56 BadType::BlockDevice => "block device",
56 57 BadType::FIFO => "fifo",
57 58 BadType::Socket => "socket",
58 59 BadType::Directory => "directory",
59 60 BadType::Unknown => "unknown",
60 61 }
61 62 .to_string()
62 63 }
63 64 }
64 65
65 66 /// Was explicitly matched but cannot be found/accessed
66 67 #[derive(Debug, Copy, Clone)]
67 68 pub enum BadMatch {
68 69 OsError(i32),
69 70 BadType(BadType),
70 71 }
71 72
72 73 /// Enum used to dispatch new status entries into the right collections.
73 74 /// Is similar to `crate::EntryState`, but represents the transient state of
74 75 /// entries during the lifetime of a command.
75 76 #[derive(Debug, Copy, Clone)]
76 77 pub enum Dispatch {
77 78 Unsure,
78 79 Modified,
79 80 Added,
80 81 Removed,
81 82 Deleted,
82 83 Clean,
83 84 Unknown,
84 85 Ignored,
85 86 /// Empty dispatch, the file is not worth listing
86 87 None,
87 88 /// Was explicitly matched but cannot be found/accessed
88 89 Bad(BadMatch),
89 90 Directory {
90 91 /// True if the directory used to be a file in the dmap so we can say
91 92 /// that it's been removed.
92 93 was_file: bool,
93 94 },
94 95 }
95 96
96 97 type IoResult<T> = std::io::Result<T>;
97 98
98 99 /// `Box<dyn Trait>` is syntactic sugar for `Box<dyn Trait, 'static>`, so add
99 100 /// an explicit lifetime here to not fight `'static` bounds "out of nowhere".
100 101 type IgnoreFnType<'a> = Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>;
101 102
102 103 /// We have a good mix of owned (from directory traversal) and borrowed (from
103 104 /// the dirstate/explicit) paths, this comes up a lot.
104 type HgPathCow<'a> = Cow<'a, HgPath>;
105 pub type HgPathCow<'a> = Cow<'a, HgPath>;
105 106
106 107 /// A path with its computed ``Dispatch`` information
107 108 type DispatchedPath<'a> = (HgPathCow<'a>, Dispatch);
108 109
109 110 /// Dates and times that are outside the 31-bit signed range are compared
110 111 /// modulo 2^31. This should prevent hg from behaving badly with very large
111 112 /// files or corrupt dates while still having a high probability of detecting
112 113 /// changes. (issue2608)
113 114 /// TODO I haven't found a way of having `b` be `Into<i32>`, since `From<u64>`
114 115 /// is not defined for `i32`, and there is no `As` trait. This forces the
115 116 /// caller to cast `b` as `i32`.
116 117 fn mod_compare(a: i32, b: i32) -> bool {
117 118 a & i32::max_value() != b & i32::max_value()
118 119 }
119 120
120 121 /// Return a sorted list containing information about the entries
121 122 /// in the directory.
122 123 ///
123 124 /// * `skip_dot_hg` - Return an empty vec if `path` contains a `.hg` directory
124 125 fn list_directory(
125 126 path: impl AsRef<Path>,
126 127 skip_dot_hg: bool,
127 128 ) -> std::io::Result<Vec<(HgPathBuf, DirEntry)>> {
128 129 let mut results = vec![];
129 130 let entries = read_dir(path.as_ref())?;
130 131
131 132 for entry in entries {
132 133 let entry = entry?;
133 134 let filename = os_string_to_hg_path_buf(entry.file_name())?;
134 135 let file_type = entry.file_type()?;
135 136 if skip_dot_hg && filename.as_bytes() == b".hg" && file_type.is_dir() {
136 137 return Ok(vec![]);
137 138 } else {
138 139 results.push((filename, entry))
139 140 }
140 141 }
141 142
142 143 results.sort_unstable_by_key(|e| e.0.clone());
143 144 Ok(results)
144 145 }
145 146
146 147 /// The file corresponding to the dirstate entry was found on the filesystem.
147 148 fn dispatch_found(
148 149 filename: impl AsRef<HgPath>,
149 150 entry: DirstateEntry,
150 151 metadata: HgMetadata,
151 152 copy_map: &CopyMap,
152 153 options: StatusOptions,
153 154 ) -> Dispatch {
154 155 let DirstateEntry {
155 156 state,
156 157 mode,
157 158 mtime,
158 159 size,
159 160 } = entry;
160 161
161 162 let HgMetadata {
162 163 st_mode,
163 164 st_size,
164 165 st_mtime,
165 166 ..
166 167 } = metadata;
167 168
168 169 match state {
169 170 EntryState::Normal => {
170 171 let size_changed = mod_compare(size, st_size as i32);
171 172 let mode_changed =
172 173 (mode ^ st_mode as i32) & 0o100 != 0o000 && options.check_exec;
173 174 let metadata_changed = size >= 0 && (size_changed || mode_changed);
174 175 let other_parent = size == SIZE_FROM_OTHER_PARENT;
175 176
176 177 if metadata_changed
177 178 || other_parent
178 179 || copy_map.contains_key(filename.as_ref())
179 180 {
180 181 Dispatch::Modified
181 182 } else if mod_compare(mtime, st_mtime as i32)
182 183 || st_mtime == options.last_normal_time
183 184 {
184 185 // the file may have just been marked as normal and
185 186 // it may have changed in the same second without
186 187 // changing its size. This can happen if we quickly
187 188 // do multiple commits. Force lookup, so we don't
188 189 // miss such a racy file change.
189 190 Dispatch::Unsure
190 191 } else if options.list_clean {
191 192 Dispatch::Clean
192 193 } else {
193 194 Dispatch::None
194 195 }
195 196 }
196 197 EntryState::Merged => Dispatch::Modified,
197 198 EntryState::Added => Dispatch::Added,
198 199 EntryState::Removed => Dispatch::Removed,
199 200 EntryState::Unknown => Dispatch::Unknown,
200 201 }
201 202 }
202 203
203 204 /// The file corresponding to this Dirstate entry is missing.
204 205 fn dispatch_missing(state: EntryState) -> Dispatch {
205 206 match state {
206 207 // File was removed from the filesystem during commands
207 208 EntryState::Normal | EntryState::Merged | EntryState::Added => {
208 209 Dispatch::Deleted
209 210 }
210 211 // File was removed, everything is normal
211 212 EntryState::Removed => Dispatch::Removed,
212 213 // File is unknown to Mercurial, everything is normal
213 214 EntryState::Unknown => Dispatch::Unknown,
214 215 }
215 216 }
216 217
217 218 lazy_static! {
218 219 static ref DEFAULT_WORK: HashSet<&'static HgPath> = {
219 220 let mut h = HashSet::new();
220 221 h.insert(HgPath::new(b""));
221 222 h
222 223 };
223 224 }
224 225
225 226 #[derive(Debug, Copy, Clone)]
226 227 pub struct StatusOptions {
227 228 /// Remember the most recent modification timeslot for status, to make
228 229 /// sure we won't miss future size-preserving file content modifications
229 230 /// that happen within the same timeslot.
230 231 pub last_normal_time: i64,
231 232 /// Whether we are on a filesystem with UNIX-like exec flags
232 233 pub check_exec: bool,
233 234 pub list_clean: bool,
234 235 pub list_unknown: bool,
235 236 pub list_ignored: bool,
236 237 /// Whether to collect traversed dirs for applying a callback later.
237 238 /// Used by `hg purge` for example.
238 239 pub collect_traversed_dirs: bool,
239 240 }
240 241
241 242 #[derive(Debug)]
242 243 pub struct DirstateStatus<'a> {
243 244 pub modified: Vec<HgPathCow<'a>>,
244 245 pub added: Vec<HgPathCow<'a>>,
245 246 pub removed: Vec<HgPathCow<'a>>,
246 247 pub deleted: Vec<HgPathCow<'a>>,
247 248 pub clean: Vec<HgPathCow<'a>>,
248 249 pub ignored: Vec<HgPathCow<'a>>,
249 250 pub unknown: Vec<HgPathCow<'a>>,
250 251 pub bad: Vec<(HgPathCow<'a>, BadMatch)>,
251 252 /// Only filled if `collect_traversed_dirs` is `true`
252 253 pub traversed: Vec<HgPathBuf>,
253 254 }
254 255
255 256 #[derive(Debug)]
256 257 pub enum StatusError {
257 258 /// Generic IO error
258 259 IO(std::io::Error),
259 260 /// An invalid path that cannot be represented in Mercurial was found
260 261 Path(HgPathError),
261 262 /// An invalid "ignore" pattern was found
262 263 Pattern(PatternError),
263 264 }
264 265
265 266 pub type StatusResult<T> = Result<T, StatusError>;
266 267
267 268 impl From<PatternError> for StatusError {
268 269 fn from(e: PatternError) -> Self {
269 270 StatusError::Pattern(e)
270 271 }
271 272 }
272 273 impl From<HgPathError> for StatusError {
273 274 fn from(e: HgPathError) -> Self {
274 275 StatusError::Path(e)
275 276 }
276 277 }
277 278 impl From<std::io::Error> for StatusError {
278 279 fn from(e: std::io::Error) -> Self {
279 280 StatusError::IO(e)
280 281 }
281 282 }
282 283
283 284 impl ToString for StatusError {
284 285 fn to_string(&self) -> String {
285 286 match self {
286 287 StatusError::IO(e) => e.to_string(),
287 288 StatusError::Path(e) => e.to_string(),
288 289 StatusError::Pattern(e) => e.to_string(),
289 290 }
290 291 }
291 292 }
292 293
293 294 /// Gives information about which files are changed in the working directory
294 295 /// and how, compared to the revision we're based on
295 296 pub struct Status<'a, M: Matcher + Sync> {
296 297 dmap: &'a DirstateMap,
297 matcher: &'a M,
298 pub(crate) matcher: &'a M,
298 299 root_dir: PathBuf,
299 options: StatusOptions,
300 pub(crate) options: StatusOptions,
300 301 ignore_fn: IgnoreFnType<'a>,
301 302 }
302 303
303 304 impl<'a, M> Status<'a, M>
304 305 where
305 306 M: Matcher + Sync,
306 307 {
307 308 pub fn new(
308 309 dmap: &'a DirstateMap,
309 310 matcher: &'a M,
310 311 root_dir: PathBuf,
311 312 ignore_files: Vec<PathBuf>,
312 313 options: StatusOptions,
313 314 ) -> StatusResult<(Self, Vec<PatternFileWarning>)> {
314 315 // Needs to outlive `dir_ignore_fn` since it's captured.
315 316
316 317 let (ignore_fn, warnings): (IgnoreFnType, _) =
317 318 if options.list_ignored || options.list_unknown {
318 319 get_ignore_function(ignore_files, &root_dir)?
319 320 } else {
320 321 (Box::new(|&_| true), vec![])
321 322 };
322 323
323 324 Ok((
324 325 Self {
325 326 dmap,
326 327 matcher,
327 328 root_dir,
328 329 options,
329 330 ignore_fn,
330 331 },
331 332 warnings,
332 333 ))
333 334 }
334 335
335 336 /// Is the path ignored?
336 337 pub fn is_ignored(&self, path: impl AsRef<HgPath>) -> bool {
337 338 (self.ignore_fn)(path.as_ref())
338 339 }
339 340
340 341 /// Is the path or one of its ancestors ignored?
341 342 pub fn dir_ignore(&self, dir: impl AsRef<HgPath>) -> bool {
342 343 // Only involve ignore mechanism if we're listing unknowns or ignored.
343 344 if self.options.list_ignored || self.options.list_unknown {
344 345 if self.is_ignored(&dir) {
345 346 true
346 347 } else {
347 348 for p in find_dirs(dir.as_ref()) {
348 349 if self.is_ignored(p) {
349 350 return true;
350 351 }
351 352 }
352 353 false
353 354 }
354 355 } else {
355 356 true
356 357 }
357 358 }
358 359
359 360 /// Get stat data about the files explicitly specified by the matcher.
360 361 /// Returns a tuple of the directories that need to be traversed and the
361 362 /// files with their corresponding `Dispatch`.
362 363 /// TODO subrepos
363 364 #[timed]
364 365 pub fn walk_explicit(
365 366 &self,
366 367 traversed_sender: crossbeam::Sender<HgPathBuf>,
367 368 ) -> (Vec<DispatchedPath<'a>>, Vec<DispatchedPath<'a>>) {
368 369 self.matcher
369 370 .file_set()
370 371 .unwrap_or(&DEFAULT_WORK)
371 372 .par_iter()
372 373 .map(|&filename| -> Option<IoResult<_>> {
373 374 // TODO normalization
374 375 let normalized = filename;
375 376
376 377 let buf = match hg_path_to_path_buf(normalized) {
377 378 Ok(x) => x,
378 379 Err(e) => return Some(Err(e.into())),
379 380 };
380 381 let target = self.root_dir.join(buf);
381 382 let st = target.symlink_metadata();
382 383 let in_dmap = self.dmap.get(normalized);
383 384 match st {
384 385 Ok(meta) => {
385 386 let file_type = meta.file_type();
386 387 return if file_type.is_file() || file_type.is_symlink()
387 388 {
388 389 if let Some(entry) = in_dmap {
389 390 return Some(Ok((
390 391 Cow::Borrowed(normalized),
391 392 dispatch_found(
392 393 &normalized,
393 394 *entry,
394 395 HgMetadata::from_metadata(meta),
395 396 &self.dmap.copy_map,
396 397 self.options,
397 398 ),
398 399 )));
399 400 }
400 401 Some(Ok((
401 402 Cow::Borrowed(normalized),
402 403 Dispatch::Unknown,
403 404 )))
404 405 } else if file_type.is_dir() {
405 406 if self.options.collect_traversed_dirs {
406 407 traversed_sender
407 408 .send(normalized.to_owned())
408 409 .expect("receiver should outlive sender");
409 410 }
410 411 Some(Ok((
411 412 Cow::Borrowed(normalized),
412 413 Dispatch::Directory {
413 414 was_file: in_dmap.is_some(),
414 415 },
415 416 )))
416 417 } else {
417 418 Some(Ok((
418 419 Cow::Borrowed(normalized),
419 420 Dispatch::Bad(BadMatch::BadType(
420 421 // TODO do more than unknown
421 422 // Support for all `BadType` variant
422 423 // varies greatly between platforms.
423 424 // So far, no tests check the type and
424 425 // this should be good enough for most
425 426 // users.
426 427 BadType::Unknown,
427 428 )),
428 429 )))
429 430 };
430 431 }
431 432 Err(_) => {
432 433 if let Some(entry) = in_dmap {
433 434 return Some(Ok((
434 435 Cow::Borrowed(normalized),
435 436 dispatch_missing(entry.state),
436 437 )));
437 438 }
438 439 }
439 440 };
440 441 None
441 442 })
442 443 .flatten()
443 444 .filter_map(Result::ok)
444 445 .partition(|(_, dispatch)| match dispatch {
445 446 Dispatch::Directory { .. } => true,
446 447 _ => false,
447 448 })
448 449 }
449 450
450 451 /// Walk the working directory recursively to look for changes compared to
451 452 /// the current `DirstateMap`.
452 453 ///
453 454 /// This takes a mutable reference to the results to account for the
454 455 /// `extend` in timings
455 456 #[timed]
456 457 pub fn traverse(
457 458 &self,
458 459 path: impl AsRef<HgPath>,
459 460 old_results: &FastHashMap<HgPathCow<'a>, Dispatch>,
460 461 results: &mut Vec<DispatchedPath<'a>>,
461 462 traversed_sender: crossbeam::Sender<HgPathBuf>,
462 463 ) -> IoResult<()> {
463 464 // The traversal is done in parallel, so use a channel to gather
464 465 // entries. `crossbeam::Sender` is `Sync`, while `mpsc::Sender`
465 466 // is not.
466 467 let (files_transmitter, files_receiver) =
467 468 crossbeam::channel::unbounded();
468 469
469 470 self.traverse_dir(
470 471 &files_transmitter,
471 472 path,
472 473 &old_results,
473 474 traversed_sender,
474 475 )?;
475 476
476 477 // Disconnect the channel so the receiver stops waiting
477 478 drop(files_transmitter);
478 479
479 480 // TODO don't collect. Find a way of replicating the behavior of
480 481 // `itertools::process_results`, but for `rayon::ParallelIterator`
481 482 let new_results: IoResult<Vec<(Cow<HgPath>, Dispatch)>> =
482 483 files_receiver
483 484 .into_iter()
484 485 .map(|item| {
485 486 let (f, d) = item?;
486 487 Ok((Cow::Owned(f), d))
487 488 })
488 489 .collect();
489 490
490 491 results.par_extend(new_results?);
491 492
492 493 Ok(())
493 494 }
494 495
495 496 /// Dispatch a single entry (file, folder, symlink...) found during
496 497 /// `traverse`. If the entry is a folder that needs to be traversed, it
497 498 /// will be handled in a separate thread.
498 499 fn handle_traversed_entry<'b>(
499 500 &'a self,
500 501 scope: &rayon::Scope<'b>,
501 502 files_sender: &'b crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
502 503 old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>,
503 504 filename: HgPathBuf,
504 505 dir_entry: DirEntry,
505 506 traversed_sender: crossbeam::Sender<HgPathBuf>,
506 507 ) -> IoResult<()>
507 508 where
508 509 'a: 'b,
509 510 {
510 511 let file_type = dir_entry.file_type()?;
511 512 let entry_option = self.dmap.get(&filename);
512 513
513 514 if filename.as_bytes() == b".hg" {
514 515 // Could be a directory or a symlink
515 516 return Ok(());
516 517 }
517 518
518 519 if file_type.is_dir() {
519 520 self.handle_traversed_dir(
520 521 scope,
521 522 files_sender,
522 523 old_results,
523 524 entry_option,
524 525 filename,
525 526 traversed_sender,
526 527 );
527 528 } else if file_type.is_file() || file_type.is_symlink() {
528 529 if let Some(entry) = entry_option {
529 530 if self.matcher.matches_everything()
530 531 || self.matcher.matches(&filename)
531 532 {
532 533 let metadata = dir_entry.metadata()?;
533 534 files_sender
534 535 .send(Ok((
535 536 filename.to_owned(),
536 537 dispatch_found(
537 538 &filename,
538 539 *entry,
539 540 HgMetadata::from_metadata(metadata),
540 541 &self.dmap.copy_map,
541 542 self.options,
542 543 ),
543 544 )))
544 545 .unwrap();
545 546 }
546 547 } else if (self.matcher.matches_everything()
547 548 || self.matcher.matches(&filename))
548 549 && !self.is_ignored(&filename)
549 550 {
550 551 if (self.options.list_ignored
551 552 || self.matcher.exact_match(&filename))
552 553 && self.dir_ignore(&filename)
553 554 {
554 555 if self.options.list_ignored {
555 556 files_sender
556 557 .send(Ok((filename.to_owned(), Dispatch::Ignored)))
557 558 .unwrap();
558 559 }
559 560 } else if self.options.list_unknown {
560 561 files_sender
561 562 .send(Ok((filename.to_owned(), Dispatch::Unknown)))
562 563 .unwrap();
563 564 }
564 565 } else if self.is_ignored(&filename) && self.options.list_ignored {
565 566 files_sender
566 567 .send(Ok((filename.to_owned(), Dispatch::Ignored)))
567 568 .unwrap();
568 569 }
569 570 } else if let Some(entry) = entry_option {
570 571 // Used to be a file or a folder, now something else.
571 572 if self.matcher.matches_everything()
572 573 || self.matcher.matches(&filename)
573 574 {
574 575 files_sender
575 576 .send(Ok((
576 577 filename.to_owned(),
577 578 dispatch_missing(entry.state),
578 579 )))
579 580 .unwrap();
580 581 }
581 582 }
582 583
583 584 Ok(())
584 585 }
585 586
586 587 /// A directory was found in the filesystem and needs to be traversed
587 588 fn handle_traversed_dir<'b>(
588 589 &'a self,
589 590 scope: &rayon::Scope<'b>,
590 591 files_sender: &'b crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
591 592 old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>,
592 593 entry_option: Option<&'a DirstateEntry>,
593 594 directory: HgPathBuf,
594 595 traversed_sender: crossbeam::Sender<HgPathBuf>,
595 596 ) where
596 597 'a: 'b,
597 598 {
598 599 scope.spawn(move |_| {
599 600 // Nested `if` until `rust-lang/rust#53668` is stable
600 601 if let Some(entry) = entry_option {
601 602 // Used to be a file, is now a folder
602 603 if self.matcher.matches_everything()
603 604 || self.matcher.matches(&directory)
604 605 {
605 606 files_sender
606 607 .send(Ok((
607 608 directory.to_owned(),
608 609 dispatch_missing(entry.state),
609 610 )))
610 611 .unwrap();
611 612 }
612 613 }
613 614 // Do we need to traverse it?
614 615 if !self.is_ignored(&directory) || self.options.list_ignored {
615 616 self.traverse_dir(
616 617 files_sender,
617 618 directory,
618 619 &old_results,
619 620 traversed_sender,
620 621 )
621 622 .unwrap_or_else(|e| files_sender.send(Err(e)).unwrap())
622 623 }
623 624 });
624 625 }
625 626
626 627 /// Decides whether the directory needs to be listed, and if so handles the
627 628 /// entries in a separate thread.
628 629 fn traverse_dir(
629 630 &self,
630 631 files_sender: &crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
631 632 directory: impl AsRef<HgPath>,
632 633 old_results: &FastHashMap<Cow<HgPath>, Dispatch>,
633 634 traversed_sender: crossbeam::Sender<HgPathBuf>,
634 635 ) -> IoResult<()> {
635 636 let directory = directory.as_ref();
636 637
637 638 if self.options.collect_traversed_dirs {
638 639 traversed_sender
639 640 .send(directory.to_owned())
640 641 .expect("receiver should outlive sender");
641 642 }
642 643
643 644 let visit_entries = match self.matcher.visit_children_set(directory) {
644 645 VisitChildrenSet::Empty => return Ok(()),
645 646 VisitChildrenSet::This | VisitChildrenSet::Recursive => None,
646 647 VisitChildrenSet::Set(set) => Some(set),
647 648 };
648 649 let buf = hg_path_to_path_buf(directory)?;
649 650 let dir_path = self.root_dir.join(buf);
650 651
651 652 let skip_dot_hg = !directory.as_bytes().is_empty();
652 653 let entries = match list_directory(dir_path, skip_dot_hg) {
653 654 Err(e) => {
654 655 return match e.kind() {
655 656 ErrorKind::NotFound | ErrorKind::PermissionDenied => {
656 657 files_sender
657 658 .send(Ok((
658 659 directory.to_owned(),
659 660 Dispatch::Bad(BadMatch::OsError(
660 661 // Unwrapping here is OK because the error
661 662 // always is a
662 663 // real os error
663 664 e.raw_os_error().unwrap(),
664 665 )),
665 666 )))
666 667 .expect("receiver should outlive sender");
667 668 Ok(())
668 669 }
669 670 _ => Err(e),
670 671 };
671 672 }
672 673 Ok(entries) => entries,
673 674 };
674 675
675 676 rayon::scope(|scope| -> IoResult<()> {
676 677 for (filename, dir_entry) in entries {
677 678 if let Some(ref set) = visit_entries {
678 679 if !set.contains(filename.deref()) {
679 680 continue;
680 681 }
681 682 }
682 683 // TODO normalize
683 684 let filename = if directory.is_empty() {
684 685 filename.to_owned()
685 686 } else {
686 687 directory.join(&filename)
687 688 };
688 689
689 690 if !old_results.contains_key(filename.deref()) {
690 691 self.handle_traversed_entry(
691 692 scope,
692 693 files_sender,
693 694 old_results,
694 695 filename,
695 696 dir_entry,
696 697 traversed_sender.clone(),
697 698 )?;
698 699 }
699 700 }
700 701 Ok(())
701 702 })
702 703 }
703 704
704 705 /// Checks all files that are in the dirstate but were not found during the
705 706 /// working directory traversal. This means that the rest must
706 707 /// be either ignored, under a symlink or under a new nested repo.
707 708 ///
708 709 /// This takes a mutable reference to the results to account for the
709 710 /// `extend` in timings
710 711 #[timed]
711 fn handle_unknowns(
712 pub fn handle_unknowns(
712 713 &self,
713 714 results: &mut Vec<DispatchedPath<'a>>,
714 715 ) -> IoResult<()> {
715 716 let to_visit: Vec<(&HgPath, &DirstateEntry)> =
716 717 if results.is_empty() && self.matcher.matches_everything() {
717 718 self.dmap.iter().map(|(f, e)| (f.deref(), e)).collect()
718 719 } else {
719 720 // Only convert to a hashmap if needed.
720 721 let old_results: FastHashMap<_, _> =
721 722 results.iter().cloned().collect();
722 723 self.dmap
723 724 .iter()
724 725 .filter_map(move |(f, e)| {
725 726 if !old_results.contains_key(f.deref())
726 727 && self.matcher.matches(f)
727 728 {
728 729 Some((f.deref(), e))
729 730 } else {
730 731 None
731 732 }
732 733 })
733 734 .collect()
734 735 };
735 736
736 737 let path_auditor = PathAuditor::new(&self.root_dir);
737 738
738 739 // TODO don't collect. Find a way of replicating the behavior of
739 740 // `itertools::process_results`, but for `rayon::ParallelIterator`
740 741 let new_results: IoResult<Vec<_>> = to_visit
741 742 .into_par_iter()
742 743 .filter_map(|(filename, entry)| -> Option<IoResult<_>> {
743 744 // Report ignored items in the dmap as long as they are not
744 745 // under a symlink directory.
745 746 if path_auditor.check(filename) {
746 747 // TODO normalize for case-insensitive filesystems
747 748 let buf = match hg_path_to_path_buf(filename) {
748 749 Ok(x) => x,
749 750 Err(e) => return Some(Err(e.into())),
750 751 };
751 752 Some(Ok((
752 753 Cow::Borrowed(filename),
753 754 match self.root_dir.join(&buf).symlink_metadata() {
754 755 // File was just ignored, no links, and exists
755 756 Ok(meta) => {
756 757 let metadata = HgMetadata::from_metadata(meta);
757 758 dispatch_found(
758 759 filename,
759 760 *entry,
760 761 metadata,
761 762 &self.dmap.copy_map,
762 763 self.options,
763 764 )
764 765 }
765 766 // File doesn't exist
766 767 Err(_) => dispatch_missing(entry.state),
767 768 },
768 769 )))
769 770 } else {
770 771 // It's either missing or under a symlink directory which
771 772 // we, in this case, report as missing.
772 773 Some(Ok((
773 774 Cow::Borrowed(filename),
774 775 dispatch_missing(entry.state),
775 776 )))
776 777 }
777 778 })
778 779 .collect();
779 780
780 781 results.par_extend(new_results?);
781 782
782 783 Ok(())
783 784 }
784 785
785 786 /// Add the files in the dirstate to the results.
786 787 ///
787 788 /// This takes a mutable reference to the results to account for the
788 789 /// `extend` in timings
789 790 #[timed]
790 fn extend_from_dmap(&self, results: &mut Vec<DispatchedPath<'a>>) {
791 pub fn extend_from_dmap(&self, results: &mut Vec<DispatchedPath<'a>>) {
791 792 results.par_extend(self.dmap.par_iter().flat_map(
792 793 move |(filename, entry)| {
793 794 let filename: &HgPath = filename;
794 795 let filename_as_path = hg_path_to_path_buf(filename)?;
795 796 let meta =
796 797 self.root_dir.join(filename_as_path).symlink_metadata();
797 798
798 799 match meta {
799 800 Ok(ref m)
800 801 if !(m.file_type().is_file()
801 802 || m.file_type().is_symlink()) =>
802 803 {
803 804 Ok((
804 805 Cow::Borrowed(filename),
805 806 dispatch_missing(entry.state),
806 807 ))
807 808 }
808 809 Ok(m) => Ok((
809 810 Cow::Borrowed(filename),
810 811 dispatch_found(
811 812 filename,
812 813 *entry,
813 814 HgMetadata::from_metadata(m),
814 815 &self.dmap.copy_map,
815 816 self.options,
816 817 ),
817 818 )),
818 819 Err(ref e)
819 820 if e.kind() == ErrorKind::NotFound
820 821 || e.raw_os_error() == Some(20) =>
821 822 {
822 823 // Rust does not yet have an `ErrorKind` for
823 824 // `NotADirectory` (errno 20)
824 825 // It happens if the dirstate contains `foo/bar`
825 826 // and foo is not a
826 827 // directory
827 828 Ok((
828 829 Cow::Borrowed(filename),
829 830 dispatch_missing(entry.state),
830 831 ))
831 832 }
832 833 Err(e) => Err(e),
833 834 }
834 835 },
835 836 ));
836 837 }
837 838 }
838 839
839 840 #[timed]
840 fn build_response<'a>(
841 pub fn build_response<'a>(
841 842 results: impl IntoIterator<Item = DispatchedPath<'a>>,
842 843 traversed: Vec<HgPathBuf>,
843 844 ) -> (Vec<HgPathCow<'a>>, DirstateStatus<'a>) {
844 845 let mut lookup = vec![];
845 846 let mut modified = vec![];
846 847 let mut added = vec![];
847 848 let mut removed = vec![];
848 849 let mut deleted = vec![];
849 850 let mut clean = vec![];
850 851 let mut ignored = vec![];
851 852 let mut unknown = vec![];
852 853 let mut bad = vec![];
853 854
854 855 for (filename, dispatch) in results.into_iter() {
855 856 match dispatch {
856 857 Dispatch::Unknown => unknown.push(filename),
857 858 Dispatch::Unsure => lookup.push(filename),
858 859 Dispatch::Modified => modified.push(filename),
859 860 Dispatch::Added => added.push(filename),
860 861 Dispatch::Removed => removed.push(filename),
861 862 Dispatch::Deleted => deleted.push(filename),
862 863 Dispatch::Clean => clean.push(filename),
863 864 Dispatch::Ignored => ignored.push(filename),
864 865 Dispatch::None => {}
865 866 Dispatch::Bad(reason) => bad.push((filename, reason)),
866 867 Dispatch::Directory { .. } => {}
867 868 }
868 869 }
869 870
870 871 (
871 872 lookup,
872 873 DirstateStatus {
873 874 modified,
874 875 added,
875 876 removed,
876 877 deleted,
877 878 clean,
878 879 ignored,
879 880 unknown,
880 881 bad,
881 882 traversed,
882 883 },
883 884 )
884 885 }
885 886
886 887 /// Get the status of files in the working directory.
887 888 ///
888 889 /// This is the current entry-point for `hg-core` and is realistically unusable
889 890 /// outside of a Python context because its arguments need to provide a lot of
890 891 /// information that will not be necessary in the future.
891 892 #[timed]
892 893 pub fn status<'a>(
893 894 dmap: &'a DirstateMap,
894 895 matcher: &'a (impl Matcher + Sync),
895 896 root_dir: PathBuf,
896 897 ignore_files: Vec<PathBuf>,
897 898 options: StatusOptions,
898 899 ) -> StatusResult<(
899 900 (Vec<HgPathCow<'a>>, DirstateStatus<'a>),
900 901 Vec<PatternFileWarning>,
901 902 )> {
902 let (traversed_sender, traversed_receiver) =
903 crossbeam::channel::unbounded();
904 let (st, warnings) =
903 let (status, warnings) =
905 904 Status::new(dmap, matcher, root_dir, ignore_files, options)?;
906 905
907 // Step 1: check the files explicitly mentioned by the user
908 let (work, mut results) = st.walk_explicit(traversed_sender.clone());
909
910 if !work.is_empty() {
911 // Hashmaps are quite a bit slower to build than vecs, so only build it
912 // if needed.
913 let old_results = results.iter().cloned().collect();
914
915 // Step 2: recursively check the working directory for changes if
916 // needed
917 for (dir, dispatch) in work {
918 match dispatch {
919 Dispatch::Directory { was_file } => {
920 if was_file {
921 results.push((dir.to_owned(), Dispatch::Removed));
906 Ok((status.run()?, warnings))
922 907 }
923 if options.list_ignored
924 || options.list_unknown && !st.dir_ignore(&dir)
925 {
926 st.traverse(
927 &dir,
928 &old_results,
929 &mut results,
930 traversed_sender.clone(),
931 )?;
932 }
933 }
934 _ => unreachable!("There can only be directories in `work`"),
935 }
936 }
937 }
938
939 if !matcher.is_exact() {
940 if options.list_unknown {
941 st.handle_unknowns(&mut results)?;
942 } else {
943 // TODO this is incorrect, see issue6335
944 // This requires a fix in both Python and Rust that can happen
945 // with other pending changes to `status`.
946 st.extend_from_dmap(&mut results);
947 }
948 }
949
950 drop(traversed_sender);
951 let traversed = traversed_receiver.into_iter().collect();
952
953 Ok((build_response(results, traversed), warnings))
954 }
@@ -1,12 +1,13 b''
1 mod dirstate_status;
1 2 mod find_root;
2 3 pub use find_root::{FindRoot, FindRootError, FindRootErrorKind};
3 4
4 5 /// An interface for high-level hg operations.
5 6 ///
6 7 /// A distinction is made between operation and commands.
7 8 /// An operation is what can be done whereas a command is what is exposed by
8 9 /// the cli. A single command can use several operations to achieve its goal.
9 10 pub trait Operation<T> {
10 11 type Error;
11 12 fn run(&self) -> Result<T, Self::Error>;
12 13 }
General Comments 0
You need to be logged in to leave comments. Login now