##// END OF EJS Templates
rust-status: add function for sequential traversal of the working directory...
Raphaël Gomès -
r45014:1debb589 default
parent child Browse files
Show More
@@ -1,361 +1,536 b''
1 1 // status.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Rust implementation of dirstate.status (dirstate.py).
9 9 //! It is currently missing a lot of functionality compared to the Python one
10 10 //! and will only be triggered in narrow cases.
11 11
12 12 use crate::{
13 13 dirstate::SIZE_FROM_OTHER_PARENT,
14 matchers::Matcher,
14 matchers::{Matcher, VisitChildrenSet},
15 15 utils::{
16 16 files::HgMetadata,
17 17 hg_path::{
18 18 hg_path_to_path_buf, os_string_to_hg_path_buf, HgPath, HgPathBuf,
19 19 },
20 20 },
21 CopyMap, DirstateEntry, DirstateMap, EntryState,
21 CopyMap, DirstateEntry, DirstateMap, EntryState, FastHashMap,
22 22 };
23 23 use rayon::prelude::*;
24 use std::collections::HashSet;
24 use std::borrow::Cow;
25 use std::collections::{HashSet, VecDeque};
25 26 use std::fs::{read_dir, DirEntry};
27 use std::io::ErrorKind;
28 use std::ops::Deref;
26 29 use std::path::Path;
27 30
28 31 /// Wrong type of file from a `BadMatch`
29 32 /// Note: a lot of those don't exist on all platforms.
30 33 #[derive(Debug)]
31 34 pub enum BadType {
32 35 CharacterDevice,
33 36 BlockDevice,
34 37 FIFO,
35 38 Socket,
36 39 Directory,
37 40 Unknown,
38 41 }
39 42
40 43 /// Was explicitly matched but cannot be found/accessed
41 44 #[derive(Debug)]
42 45 pub enum BadMatch {
43 46 OsError(i32),
44 47 BadType(BadType),
45 48 }
46 49
47 50 /// Marker enum used to dispatch new status entries into the right collections.
48 51 /// Is similar to `crate::EntryState`, but represents the transient state of
49 52 /// entries during the lifetime of a command.
50 53 enum Dispatch {
51 54 Unsure,
52 55 Modified,
53 56 Added,
54 57 Removed,
55 58 Deleted,
56 59 Clean,
57 60 Unknown,
58 61 Ignored,
59 62 /// Empty dispatch, the file is not worth listing
60 63 None,
61 64 /// Was explicitly matched but cannot be found/accessed
62 65 Bad(BadMatch),
63 66 Directory {
64 67 /// True if the directory used to be a file in the dmap so we can say
65 68 /// that it's been removed.
66 69 was_file: bool,
67 70 },
68 71 }
69 72
70 73 type IoResult<T> = std::io::Result<T>;
71 74
72 75 /// Dates and times that are outside the 31-bit signed range are compared
73 76 /// modulo 2^31. This should prevent hg from behaving badly with very large
74 77 /// files or corrupt dates while still having a high probability of detecting
75 78 /// changes. (issue2608)
76 79 /// TODO I haven't found a way of having `b` be `Into<i32>`, since `From<u64>`
77 80 /// is not defined for `i32`, and there is no `As` trait. This forces the
78 81 /// caller to cast `b` as `i32`.
79 82 fn mod_compare(a: i32, b: i32) -> bool {
80 83 a & i32::max_value() != b & i32::max_value()
81 84 }
82 85
83 86 /// Return a sorted list containing information about the entries
84 87 /// in the directory.
85 88 ///
86 89 /// * `skip_dot_hg` - Return an empty vec if `path` contains a `.hg` directory
87 90 fn list_directory(
88 91 path: impl AsRef<Path>,
89 92 skip_dot_hg: bool,
90 93 ) -> std::io::Result<Vec<(HgPathBuf, DirEntry)>> {
91 94 let mut results = vec![];
92 95 let entries = read_dir(path.as_ref())?;
93 96
94 97 for entry in entries {
95 98 let entry = entry?;
96 99 let filename = os_string_to_hg_path_buf(entry.file_name())?;
97 100 let file_type = entry.file_type()?;
98 101 if skip_dot_hg && filename.as_bytes() == b".hg" && file_type.is_dir() {
99 102 return Ok(vec![]);
100 103 } else {
101 104 results.push((HgPathBuf::from(filename), entry))
102 105 }
103 106 }
104 107
105 108 results.sort_unstable_by_key(|e| e.0.clone());
106 109 Ok(results)
107 110 }
108 111
109 112 /// The file corresponding to the dirstate entry was found on the filesystem.
110 113 fn dispatch_found(
111 114 filename: impl AsRef<HgPath>,
112 115 entry: DirstateEntry,
113 116 metadata: HgMetadata,
114 117 copy_map: &CopyMap,
115 118 options: StatusOptions,
116 119 ) -> Dispatch {
117 120 let DirstateEntry {
118 121 state,
119 122 mode,
120 123 mtime,
121 124 size,
122 125 } = entry;
123 126
124 127 let HgMetadata {
125 128 st_mode,
126 129 st_size,
127 130 st_mtime,
128 131 ..
129 132 } = metadata;
130 133
131 134 match state {
132 135 EntryState::Normal => {
133 136 let size_changed = mod_compare(size, st_size as i32);
134 137 let mode_changed =
135 138 (mode ^ st_mode as i32) & 0o100 != 0o000 && options.check_exec;
136 139 let metadata_changed = size >= 0 && (size_changed || mode_changed);
137 140 let other_parent = size == SIZE_FROM_OTHER_PARENT;
138 141 if metadata_changed
139 142 || other_parent
140 143 || copy_map.contains_key(filename.as_ref())
141 144 {
142 145 Dispatch::Modified
143 146 } else if mod_compare(mtime, st_mtime as i32) {
144 147 Dispatch::Unsure
145 148 } else if st_mtime == options.last_normal_time {
146 149 // the file may have just been marked as normal and
147 150 // it may have changed in the same second without
148 151 // changing its size. This can happen if we quickly
149 152 // do multiple commits. Force lookup, so we don't
150 153 // miss such a racy file change.
151 154 Dispatch::Unsure
152 155 } else if options.list_clean {
153 156 Dispatch::Clean
154 157 } else {
155 158 Dispatch::Unknown
156 159 }
157 160 }
158 161 EntryState::Merged => Dispatch::Modified,
159 162 EntryState::Added => Dispatch::Added,
160 163 EntryState::Removed => Dispatch::Removed,
161 164 EntryState::Unknown => Dispatch::Unknown,
162 165 }
163 166 }
164 167
165 168 /// The file corresponding to this Dirstate entry is missing.
166 169 fn dispatch_missing(state: EntryState) -> Dispatch {
167 170 match state {
168 171 // File was removed from the filesystem during commands
169 172 EntryState::Normal | EntryState::Merged | EntryState::Added => {
170 173 Dispatch::Deleted
171 174 }
172 175 // File was removed, everything is normal
173 176 EntryState::Removed => Dispatch::Removed,
174 177 // File is unknown to Mercurial, everything is normal
175 178 EntryState::Unknown => Dispatch::Unknown,
176 179 }
177 180 }
178 181
179 182 /// Get stat data about the files explicitly specified by match.
180 183 /// TODO subrepos
181 184 fn walk_explicit<'a>(
182 185 files: &'a HashSet<&HgPath>,
183 186 dmap: &'a DirstateMap,
184 187 root_dir: impl AsRef<Path> + Sync + Send,
185 188 options: StatusOptions,
186 189 ) -> impl ParallelIterator<Item = IoResult<(&'a HgPath, Dispatch)>> {
187 190 files.par_iter().filter_map(move |filename| {
188 191 // TODO normalization
189 192 let normalized = filename.as_ref();
190 193
191 194 let buf = match hg_path_to_path_buf(normalized) {
192 195 Ok(x) => x,
193 196 Err(e) => return Some(Err(e.into())),
194 197 };
195 198 let target = root_dir.as_ref().join(buf);
196 199 let st = target.symlink_metadata();
197 200 match st {
198 201 Ok(meta) => {
199 202 let file_type = meta.file_type();
200 203 if file_type.is_file() || file_type.is_symlink() {
201 204 if let Some(entry) = dmap.get(normalized) {
202 205 return Some(Ok((
203 206 normalized,
204 207 dispatch_found(
205 208 &normalized,
206 209 *entry,
207 210 HgMetadata::from_metadata(meta),
208 211 &dmap.copy_map,
209 212 options,
210 213 ),
211 214 )));
212 215 }
213 216 } else {
214 217 if dmap.contains_key(normalized) {
215 218 return Some(Ok((normalized, Dispatch::Removed)));
216 219 }
217 220 }
218 221 }
219 222 Err(_) => {
220 223 if let Some(entry) = dmap.get(normalized) {
221 224 return Some(Ok((
222 225 normalized,
223 226 dispatch_missing(entry.state),
224 227 )));
225 228 }
226 229 }
227 230 };
228 231 None
229 232 })
230 233 }
231 234
232 235 #[derive(Debug, Copy, Clone)]
233 236 pub struct StatusOptions {
234 237 /// Remember the most recent modification timeslot for status, to make
235 238 /// sure we won't miss future size-preserving file content modifications
236 239 /// that happen within the same timeslot.
237 240 pub last_normal_time: i64,
238 241 /// Whether we are on a filesystem with UNIX-like exec flags
239 242 pub check_exec: bool,
240 243 pub list_clean: bool,
244 pub list_unknown: bool,
245 pub list_ignored: bool,
246 }
247
248 /// Dispatch a single file found during `traverse`.
249 /// If `file` is a folder that needs to be traversed, it will be pushed into
250 /// `work`.
251 fn traverse_worker<'a>(
252 work: &mut VecDeque<HgPathBuf>,
253 matcher: &impl Matcher,
254 dmap: &DirstateMap,
255 filename: impl AsRef<HgPath>,
256 dir_entry: &DirEntry,
257 ignore_fn: &impl for<'r> Fn(&'r HgPath) -> bool,
258 dir_ignore_fn: &impl for<'r> Fn(&'r HgPath) -> bool,
259 options: StatusOptions,
260 ) -> Option<IoResult<(Cow<'a, HgPath>, Dispatch)>> {
261 let file_type = match dir_entry.file_type() {
262 Ok(x) => x,
263 Err(e) => return Some(Err(e.into())),
264 };
265 let filename = filename.as_ref();
266 let entry_option = dmap.get(filename);
267
268 if file_type.is_dir() {
269 // Do we need to traverse it?
270 if !ignore_fn(&filename) || options.list_ignored {
271 work.push_front(filename.to_owned());
272 }
273 // Nested `if` until `rust-lang/rust#53668` is stable
274 if let Some(entry) = entry_option {
275 // Used to be a file, is now a folder
276 if matcher.matches_everything() || matcher.matches(&filename) {
277 return Some(Ok((
278 Cow::Owned(filename.to_owned()),
279 dispatch_missing(entry.state),
280 )));
281 }
282 }
283 } else if file_type.is_file() || file_type.is_symlink() {
284 if let Some(entry) = entry_option {
285 if matcher.matches_everything() || matcher.matches(&filename) {
286 let metadata = match dir_entry.metadata() {
287 Ok(x) => x,
288 Err(e) => return Some(Err(e.into())),
289 };
290 return Some(Ok((
291 Cow::Owned(filename.to_owned()),
292 dispatch_found(
293 &filename,
294 *entry,
295 HgMetadata::from_metadata(metadata),
296 &dmap.copy_map,
297 options,
298 ),
299 )));
300 }
301 } else if (matcher.matches_everything() || matcher.matches(&filename))
302 && !ignore_fn(&filename)
303 {
304 if (options.list_ignored || matcher.exact_match(&filename))
305 && dir_ignore_fn(&filename)
306 {
307 if options.list_ignored {
308 return Some(Ok((
309 Cow::Owned(filename.to_owned()),
310 Dispatch::Ignored,
311 )));
312 }
313 } else {
314 return Some(Ok((
315 Cow::Owned(filename.to_owned()),
316 Dispatch::Unknown,
317 )));
318 }
319 }
320 } else if let Some(entry) = entry_option {
321 // Used to be a file or a folder, now something else.
322 if matcher.matches_everything() || matcher.matches(&filename) {
323 return Some(Ok((
324 Cow::Owned(filename.to_owned()),
325 dispatch_missing(entry.state),
326 )));
327 }
328 }
329 None
330 }
331
332 /// Walk the working directory recursively to look for changes compared to the
333 /// current `DirstateMap`.
334 fn traverse<'a>(
335 matcher: &(impl Matcher + Sync),
336 root_dir: impl AsRef<Path>,
337 dmap: &DirstateMap,
338 path: impl AsRef<HgPath>,
339 old_results: FastHashMap<Cow<'a, HgPath>, Dispatch>,
340 ignore_fn: &(impl for<'r> Fn(&'r HgPath) -> bool + Sync),
341 dir_ignore_fn: &(impl for<'r> Fn(&'r HgPath) -> bool + Sync),
342 options: StatusOptions,
343 ) -> IoResult<FastHashMap<Cow<'a, HgPath>, Dispatch>> {
344 let root_dir = root_dir.as_ref();
345 let mut new_results = FastHashMap::default();
346
347 let mut work = VecDeque::new();
348 work.push_front(path.as_ref().to_owned());
349
350 while let Some(ref directory) = work.pop_front() {
351 if directory.as_bytes() == b".hg" {
352 continue;
353 }
354 let visit_entries = match matcher.visit_children_set(directory) {
355 VisitChildrenSet::Empty => continue,
356 VisitChildrenSet::This | VisitChildrenSet::Recursive => None,
357 VisitChildrenSet::Set(set) => Some(set),
358 };
359 let buf = hg_path_to_path_buf(directory)?;
360 let dir_path = root_dir.join(buf);
361
362 let skip_dot_hg = !directory.as_bytes().is_empty();
363 let entries = match list_directory(dir_path, skip_dot_hg) {
364 Err(e) => match e.kind() {
365 ErrorKind::NotFound | ErrorKind::PermissionDenied => {
366 new_results.insert(
367 Cow::Owned(directory.to_owned()),
368 Dispatch::Bad(BadMatch::OsError(
369 // Unwrapping here is OK because the error always
370 // is a real os error
371 e.raw_os_error().unwrap(),
372 )),
373 );
374 continue;
375 }
376 _ => return Err(e),
377 },
378 Ok(entries) => entries,
379 };
380
381 for (filename, dir_entry) in entries {
382 if let Some(ref set) = visit_entries {
383 if !set.contains(filename.deref()) {
384 continue;
385 }
386 }
387 // TODO normalize
388 let filename = if directory.is_empty() {
389 filename.to_owned()
390 } else {
391 directory.join(&filename)
392 };
393
394 if !old_results.contains_key(filename.deref()) {
395 if let Some((res, dispatch)) = traverse_worker(
396 &mut work,
397 matcher,
398 &dmap,
399 &filename,
400 &dir_entry,
401 &ignore_fn,
402 &dir_ignore_fn,
403 options,
404 )
405 .transpose()?
406 {
407 new_results.insert(res, dispatch);
408 }
409 }
410 }
411 }
412
413 new_results.extend(old_results.into_iter());
414
415 Ok(new_results)
241 416 }
242 417
243 418 /// Stat all entries in the `DirstateMap` and mark them for dispatch into
244 419 /// the relevant collections.
245 420 fn stat_dmap_entries(
246 421 dmap: &DirstateMap,
247 422 root_dir: impl AsRef<Path> + Sync + Send,
248 423 options: StatusOptions,
249 424 ) -> impl ParallelIterator<Item = IoResult<(&HgPath, Dispatch)>> {
250 425 dmap.par_iter().map(move |(filename, entry)| {
251 426 let filename: &HgPath = filename;
252 427 let filename_as_path = hg_path_to_path_buf(filename)?;
253 428 let meta = root_dir.as_ref().join(filename_as_path).symlink_metadata();
254 429
255 430 match meta {
256 431 Ok(ref m)
257 432 if !(m.file_type().is_file()
258 433 || m.file_type().is_symlink()) =>
259 434 {
260 435 Ok((filename, dispatch_missing(entry.state)))
261 436 }
262 437 Ok(m) => Ok((
263 438 filename,
264 439 dispatch_found(
265 440 filename,
266 441 *entry,
267 442 HgMetadata::from_metadata(m),
268 443 &dmap.copy_map,
269 444 options,
270 445 ),
271 446 )),
272 447 Err(ref e)
273 448 if e.kind() == std::io::ErrorKind::NotFound
274 449 || e.raw_os_error() == Some(20) =>
275 450 {
276 451 // Rust does not yet have an `ErrorKind` for
277 452 // `NotADirectory` (errno 20)
278 453 // It happens if the dirstate contains `foo/bar` and
279 454 // foo is not a directory
280 455 Ok((filename, dispatch_missing(entry.state)))
281 456 }
282 457 Err(e) => Err(e),
283 458 }
284 459 })
285 460 }
286 461
287 462 pub struct DirstateStatus<'a> {
288 463 pub modified: Vec<&'a HgPath>,
289 464 pub added: Vec<&'a HgPath>,
290 465 pub removed: Vec<&'a HgPath>,
291 466 pub deleted: Vec<&'a HgPath>,
292 467 pub clean: Vec<&'a HgPath>,
293 468 pub ignored: Vec<&'a HgPath>,
294 469 pub unknown: Vec<&'a HgPath>,
295 470 pub bad: Vec<(&'a HgPath, BadMatch)>,
296 471 }
297 472
298 473 fn build_response<'a>(
299 474 results: impl IntoIterator<Item = IoResult<(&'a HgPath, Dispatch)>>,
300 475 ) -> IoResult<(Vec<&'a HgPath>, DirstateStatus<'a>)> {
301 476 let mut lookup = vec![];
302 477 let mut modified = vec![];
303 478 let mut added = vec![];
304 479 let mut removed = vec![];
305 480 let mut deleted = vec![];
306 481 let mut clean = vec![];
307 482 let mut ignored = vec![];
308 483 let mut unknown = vec![];
309 484 let mut bad = vec![];
310 485
311 486 for res in results.into_iter() {
312 487 let (filename, dispatch) = res?;
313 488 match dispatch {
314 489 Dispatch::Unknown => unknown.push(filename),
315 490 Dispatch::Unsure => lookup.push(filename),
316 491 Dispatch::Modified => modified.push(filename),
317 492 Dispatch::Added => added.push(filename),
318 493 Dispatch::Removed => removed.push(filename),
319 494 Dispatch::Deleted => deleted.push(filename),
320 495 Dispatch::Clean => clean.push(filename),
321 496 Dispatch::Ignored => ignored.push(filename),
322 497 Dispatch::None => {}
323 498 Dispatch::Bad(reason) => bad.push((filename, reason)),
324 499 Dispatch::Directory { .. } => {}
325 500 }
326 501 }
327 502
328 503 Ok((
329 504 lookup,
330 505 DirstateStatus {
331 506 modified,
332 507 added,
333 508 removed,
334 509 deleted,
335 510 clean,
336 511 ignored,
337 512 unknown,
338 513 bad,
339 514 },
340 515 ))
341 516 }
342 517
343 518 pub fn status<'a: 'c, 'b: 'c, 'c>(
344 519 dmap: &'a DirstateMap,
345 520 matcher: &'b impl Matcher,
346 521 root_dir: impl AsRef<Path> + Sync + Send + Copy,
347 522 options: StatusOptions,
348 523 ) -> IoResult<(Vec<&'c HgPath>, DirstateStatus<'c>)> {
349 524 let files = matcher.file_set();
350 525 let mut results = vec![];
351 526 if let Some(files) = files {
352 527 results.par_extend(walk_explicit(&files, &dmap, root_dir, options));
353 528 }
354 529
355 530 if !matcher.is_exact() {
356 531 let stat_results = stat_dmap_entries(&dmap, root_dir, options);
357 532 results.par_extend(stat_results);
358 533 }
359 534
360 535 build_response(results)
361 536 }
General Comments 0
You need to be logged in to leave comments. Login now