##// END OF EJS Templates
dirstate-tree: Skip readdir() in `hg status -mard`...
Simon Sapin -
r48129:f27f2afb default
parent child Browse files
Show More
@@ -1,433 +1,466 b''
1 1 use crate::dirstate::status::IgnoreFnType;
2 2 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
3 3 use crate::dirstate_tree::dirstate_map::DirstateMap;
4 4 use crate::dirstate_tree::dirstate_map::NodeRef;
5 5 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
6 6 use crate::matchers::get_ignore_function;
7 7 use crate::matchers::Matcher;
8 8 use crate::utils::files::get_bytes_from_os_string;
9 use crate::utils::files::get_path_from_bytes;
9 10 use crate::utils::hg_path::HgPath;
10 11 use crate::BadMatch;
11 12 use crate::DirstateStatus;
12 13 use crate::EntryState;
13 14 use crate::HgPathBuf;
14 15 use crate::PatternFileWarning;
15 16 use crate::StatusError;
16 17 use crate::StatusOptions;
17 18 use micro_timer::timed;
18 19 use rayon::prelude::*;
19 20 use std::borrow::Cow;
20 21 use std::io;
21 22 use std::path::Path;
22 23 use std::path::PathBuf;
23 24 use std::sync::Mutex;
24 25
25 26 /// Returns the status of the working directory compared to its parent
26 27 /// changeset.
27 28 ///
28 29 /// This algorithm is based on traversing the filesystem tree (`fs` in function
29 30 /// and variable names) and dirstate tree at the same time. The core of this
30 31 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
31 32 /// and its use of `itertools::merge_join_by`. When reaching a path that only
32 33 /// exists in one of the two trees, depending on information requested by
33 34 /// `options` we may need to traverse the remaining subtree.
34 35 #[timed]
35 36 pub fn status<'tree, 'on_disk: 'tree>(
36 37 dmap: &'tree mut DirstateMap<'on_disk>,
37 38 matcher: &(dyn Matcher + Sync),
38 39 root_dir: PathBuf,
39 40 ignore_files: Vec<PathBuf>,
40 41 options: StatusOptions,
41 42 ) -> Result<(DirstateStatus<'tree>, Vec<PatternFileWarning>), StatusError> {
42 43 let (ignore_fn, warnings): (IgnoreFnType, _) =
43 44 if options.list_ignored || options.list_unknown {
44 45 get_ignore_function(ignore_files, &root_dir)?
45 46 } else {
46 47 (Box::new(|&_| true), vec![])
47 48 };
48 49
49 50 let common = StatusCommon {
50 51 dmap,
51 52 options,
52 53 matcher,
53 54 ignore_fn,
54 55 outcome: Mutex::new(DirstateStatus::default()),
55 56 };
56 57 let is_at_repo_root = true;
57 58 let hg_path = HgPath::new("");
58 59 let has_ignored_ancestor = false;
59 60 common.traverse_fs_directory_and_dirstate(
60 61 has_ignored_ancestor,
61 62 dmap.root.as_ref(),
62 63 hg_path,
63 64 &root_dir,
64 65 is_at_repo_root,
65 66 )?;
66 67 Ok((common.outcome.into_inner().unwrap(), warnings))
67 68 }
68 69
69 70 /// Bag of random things needed by various parts of the algorithm. Reduces the
70 71 /// number of parameters passed to functions.
71 72 struct StatusCommon<'tree, 'a, 'on_disk: 'tree> {
72 73 dmap: &'tree DirstateMap<'on_disk>,
73 74 options: StatusOptions,
74 75 matcher: &'a (dyn Matcher + Sync),
75 76 ignore_fn: IgnoreFnType<'a>,
76 77 outcome: Mutex<DirstateStatus<'tree>>,
77 78 }
78 79
79 80 impl<'tree, 'a> StatusCommon<'tree, 'a, '_> {
80 81 fn read_dir(
81 82 &self,
82 83 hg_path: &HgPath,
83 84 fs_path: &Path,
84 85 is_at_repo_root: bool,
85 86 ) -> Result<Vec<DirEntry>, ()> {
86 DirEntry::read_dir(fs_path, is_at_repo_root).map_err(|error| {
87 DirEntry::read_dir(fs_path, is_at_repo_root)
88 .map_err(|error| self.io_error(error, hg_path))
89 }
90
91 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
87 92 let errno = error.raw_os_error().expect("expected real OS error");
88 93 self.outcome
89 94 .lock()
90 95 .unwrap()
91 96 .bad
92 97 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
93 })
94 98 }
95 99
96 100 fn traverse_fs_directory_and_dirstate(
97 101 &self,
98 102 has_ignored_ancestor: bool,
99 103 dirstate_nodes: ChildNodesRef<'tree, '_>,
100 104 directory_hg_path: &'tree HgPath,
101 105 directory_fs_path: &Path,
102 106 is_at_repo_root: bool,
103 107 ) -> Result<(), DirstateV2ParseError> {
108 if !self.options.list_unknown && !self.options.list_ignored {
109 // We only care about files in the dirstate, so we can skip listing
110 // filesystem directories entirely.
111 return dirstate_nodes
112 .par_iter()
113 .map(|dirstate_node| {
114 let fs_path = directory_fs_path.join(get_path_from_bytes(
115 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
116 ));
117 match std::fs::symlink_metadata(&fs_path) {
118 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
119 &fs_path,
120 &fs_metadata,
121 dirstate_node,
122 has_ignored_ancestor,
123 ),
124 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
125 self.traverse_dirstate_only(dirstate_node)
126 }
127 Err(error) => {
128 let hg_path =
129 dirstate_node.full_path(self.dmap.on_disk)?;
130 Ok(self.io_error(error, hg_path))
131 }
132 }
133 })
134 .collect();
135 }
136
104 137 let mut fs_entries = if let Ok(entries) = self.read_dir(
105 138 directory_hg_path,
106 139 directory_fs_path,
107 140 is_at_repo_root,
108 141 ) {
109 142 entries
110 143 } else {
111 144 return Ok(());
112 145 };
113 146
114 147 // `merge_join_by` requires both its input iterators to be sorted:
115 148
116 149 let dirstate_nodes = dirstate_nodes.sorted();
117 150 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
118 151 // https://github.com/rust-lang/rust/issues/34162
119 152 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
120 153
121 154 // Propagate here any error that would happen inside the comparison
122 155 // callback below
123 156 for dirstate_node in &dirstate_nodes {
124 157 dirstate_node.base_name(self.dmap.on_disk)?;
125 158 }
126 159 itertools::merge_join_by(
127 160 dirstate_nodes,
128 161 &fs_entries,
129 162 |dirstate_node, fs_entry| {
130 163 // This `unwrap` never panics because we already propagated
131 164 // those errors above
132 165 dirstate_node
133 166 .base_name(self.dmap.on_disk)
134 167 .unwrap()
135 168 .cmp(&fs_entry.base_name)
136 169 },
137 170 )
138 171 .par_bridge()
139 172 .map(|pair| {
140 173 use itertools::EitherOrBoth::*;
141 174 match pair {
142 175 Both(dirstate_node, fs_entry) => self
143 176 .traverse_fs_and_dirstate(
144 fs_entry,
177 &fs_entry.full_path,
178 &fs_entry.metadata,
145 179 dirstate_node,
146 180 has_ignored_ancestor,
147 181 ),
148 182 Left(dirstate_node) => {
149 183 self.traverse_dirstate_only(dirstate_node)
150 184 }
151 185 Right(fs_entry) => Ok(self.traverse_fs_only(
152 186 has_ignored_ancestor,
153 187 directory_hg_path,
154 188 fs_entry,
155 189 )),
156 190 }
157 191 })
158 192 .collect()
159 193 }
160 194
161 195 fn traverse_fs_and_dirstate(
162 196 &self,
163 fs_entry: &DirEntry,
197 fs_path: &Path,
198 fs_metadata: &std::fs::Metadata,
164 199 dirstate_node: NodeRef<'tree, '_>,
165 200 has_ignored_ancestor: bool,
166 201 ) -> Result<(), DirstateV2ParseError> {
167 202 let hg_path = dirstate_node.full_path(self.dmap.on_disk)?;
168 let file_type = fs_entry.metadata.file_type();
203 let file_type = fs_metadata.file_type();
169 204 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
170 205 if !file_or_symlink {
171 206 // If we previously had a file here, it was removed (with
172 207 // `hg rm` or similar) or deleted before it could be
173 208 // replaced by a directory or something else.
174 209 self.mark_removed_or_deleted_if_file(
175 210 hg_path,
176 211 dirstate_node.state()?,
177 212 );
178 213 }
179 214 if file_type.is_dir() {
180 215 if self.options.collect_traversed_dirs {
181 216 self.outcome.lock().unwrap().traversed.push(hg_path.into())
182 217 }
183 218 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
184 219 let is_at_repo_root = false;
185 220 self.traverse_fs_directory_and_dirstate(
186 221 is_ignored,
187 222 dirstate_node.children(self.dmap.on_disk)?,
188 223 hg_path,
189 &fs_entry.full_path,
224 fs_path,
190 225 is_at_repo_root,
191 226 )?
192 227 } else {
193 228 if file_or_symlink && self.matcher.matches(hg_path) {
194 229 let full_path = Cow::from(hg_path);
195 230 if let Some(state) = dirstate_node.state()? {
196 231 match state {
197 232 EntryState::Added => {
198 233 self.outcome.lock().unwrap().added.push(full_path)
199 234 }
200 235 EntryState::Removed => self
201 236 .outcome
202 237 .lock()
203 238 .unwrap()
204 239 .removed
205 240 .push(full_path),
206 241 EntryState::Merged => self
207 242 .outcome
208 243 .lock()
209 244 .unwrap()
210 245 .modified
211 246 .push(full_path),
212 EntryState::Normal => {
213 self.handle_normal_file(&dirstate_node, fs_entry)?
214 }
247 EntryState::Normal => self
248 .handle_normal_file(&dirstate_node, fs_metadata)?,
215 249 // This variant is not used in DirstateMap
216 250 // nodes
217 251 EntryState::Unknown => unreachable!(),
218 252 }
219 253 } else {
220 254 // `node.entry.is_none()` indicates a "directory"
221 255 // node, but the filesystem has a file
222 256 self.mark_unknown_or_ignored(
223 257 has_ignored_ancestor,
224 258 full_path,
225 259 )
226 260 }
227 261 }
228 262
229 263 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
230 264 {
231 265 self.traverse_dirstate_only(child_node)?
232 266 }
233 267 }
234 268 Ok(())
235 269 }
236 270
237 271 /// A file with `EntryState::Normal` in the dirstate was found in the
238 272 /// filesystem
239 273 fn handle_normal_file(
240 274 &self,
241 275 dirstate_node: &NodeRef<'tree, '_>,
242 fs_entry: &DirEntry,
276 fs_metadata: &std::fs::Metadata,
243 277 ) -> Result<(), DirstateV2ParseError> {
244 278 // Keep the low 31 bits
245 279 fn truncate_u64(value: u64) -> i32 {
246 280 (value & 0x7FFF_FFFF) as i32
247 281 }
248 282 fn truncate_i64(value: i64) -> i32 {
249 283 (value & 0x7FFF_FFFF) as i32
250 284 }
251 285
252 286 let entry = dirstate_node
253 287 .entry()?
254 288 .expect("handle_normal_file called with entry-less node");
255 289 let full_path = Cow::from(dirstate_node.full_path(self.dmap.on_disk)?);
256 let mode_changed = || {
257 self.options.check_exec && entry.mode_changed(&fs_entry.metadata)
258 };
259 let size_changed = entry.size != truncate_u64(fs_entry.metadata.len());
290 let mode_changed =
291 || self.options.check_exec && entry.mode_changed(fs_metadata);
292 let size_changed = entry.size != truncate_u64(fs_metadata.len());
260 293 if entry.size >= 0
261 294 && size_changed
262 && fs_entry.metadata.file_type().is_symlink()
295 && fs_metadata.file_type().is_symlink()
263 296 {
264 297 // issue6456: Size returned may be longer due to encryption
265 298 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
266 299 self.outcome.lock().unwrap().unsure.push(full_path)
267 300 } else if dirstate_node.has_copy_source()
268 301 || entry.is_from_other_parent()
269 302 || (entry.size >= 0 && (size_changed || mode_changed()))
270 303 {
271 304 self.outcome.lock().unwrap().modified.push(full_path)
272 305 } else {
273 let mtime = mtime_seconds(&fs_entry.metadata);
306 let mtime = mtime_seconds(fs_metadata);
274 307 if truncate_i64(mtime) != entry.mtime
275 308 || mtime == self.options.last_normal_time
276 309 {
277 310 self.outcome.lock().unwrap().unsure.push(full_path)
278 311 } else if self.options.list_clean {
279 312 self.outcome.lock().unwrap().clean.push(full_path)
280 313 }
281 314 }
282 315 Ok(())
283 316 }
284 317
285 318 /// A node in the dirstate tree has no corresponding filesystem entry
286 319 fn traverse_dirstate_only(
287 320 &self,
288 321 dirstate_node: NodeRef<'tree, '_>,
289 322 ) -> Result<(), DirstateV2ParseError> {
290 323 self.mark_removed_or_deleted_if_file(
291 324 dirstate_node.full_path(self.dmap.on_disk)?,
292 325 dirstate_node.state()?,
293 326 );
294 327 dirstate_node
295 328 .children(self.dmap.on_disk)?
296 329 .par_iter()
297 330 .map(|child_node| self.traverse_dirstate_only(child_node))
298 331 .collect()
299 332 }
300 333
301 334 /// A node in the dirstate tree has no corresponding *file* on the
302 335 /// filesystem
303 336 ///
304 337 /// Does nothing on a "directory" node
305 338 fn mark_removed_or_deleted_if_file(
306 339 &self,
307 340 hg_path: &'tree HgPath,
308 341 dirstate_node_state: Option<EntryState>,
309 342 ) {
310 343 if let Some(state) = dirstate_node_state {
311 344 if self.matcher.matches(hg_path) {
312 345 if let EntryState::Removed = state {
313 346 self.outcome.lock().unwrap().removed.push(hg_path.into())
314 347 } else {
315 348 self.outcome.lock().unwrap().deleted.push(hg_path.into())
316 349 }
317 350 }
318 351 }
319 352 }
320 353
321 354 /// Something in the filesystem has no corresponding dirstate node
322 355 fn traverse_fs_only(
323 356 &self,
324 357 has_ignored_ancestor: bool,
325 358 directory_hg_path: &HgPath,
326 359 fs_entry: &DirEntry,
327 360 ) {
328 361 let hg_path = directory_hg_path.join(&fs_entry.base_name);
329 362 let file_type = fs_entry.metadata.file_type();
330 363 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
331 364 if file_type.is_dir() {
332 365 let is_ignored =
333 366 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
334 367 let traverse_children = if is_ignored {
335 368 // Descendants of an ignored directory are all ignored
336 369 self.options.list_ignored
337 370 } else {
338 371 // Descendants of an unknown directory may be either unknown or
339 372 // ignored
340 373 self.options.list_unknown || self.options.list_ignored
341 374 };
342 375 if traverse_children {
343 376 let is_at_repo_root = false;
344 377 if let Ok(children_fs_entries) = self.read_dir(
345 378 &hg_path,
346 379 &fs_entry.full_path,
347 380 is_at_repo_root,
348 381 ) {
349 382 children_fs_entries.par_iter().for_each(|child_fs_entry| {
350 383 self.traverse_fs_only(
351 384 is_ignored,
352 385 &hg_path,
353 386 child_fs_entry,
354 387 )
355 388 })
356 389 }
357 390 }
358 391 if self.options.collect_traversed_dirs {
359 392 self.outcome.lock().unwrap().traversed.push(hg_path.into())
360 393 }
361 394 } else if file_or_symlink && self.matcher.matches(&hg_path) {
362 395 self.mark_unknown_or_ignored(has_ignored_ancestor, hg_path.into())
363 396 }
364 397 }
365 398
366 399 fn mark_unknown_or_ignored(
367 400 &self,
368 401 has_ignored_ancestor: bool,
369 402 hg_path: Cow<'tree, HgPath>,
370 403 ) {
371 404 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
372 405 if is_ignored {
373 406 if self.options.list_ignored {
374 407 self.outcome.lock().unwrap().ignored.push(hg_path)
375 408 }
376 409 } else {
377 410 if self.options.list_unknown {
378 411 self.outcome.lock().unwrap().unknown.push(hg_path)
379 412 }
380 413 }
381 414 }
382 415 }
383 416
384 417 #[cfg(unix)] // TODO
385 418 fn mtime_seconds(metadata: &std::fs::Metadata) -> i64 {
386 419 // Going through `Metadata::modified()` would be portable, but would take
387 420 // care to construct a `SystemTime` value with sub-second precision just
388 421 // for us to throw that away here.
389 422 use std::os::unix::fs::MetadataExt;
390 423 metadata.mtime()
391 424 }
392 425
393 426 struct DirEntry {
394 427 base_name: HgPathBuf,
395 428 full_path: PathBuf,
396 429 metadata: std::fs::Metadata,
397 430 }
398 431
399 432 impl DirEntry {
400 433 /// Returns **unsorted** entries in the given directory, with name and
401 434 /// metadata.
402 435 ///
403 436 /// If a `.hg` sub-directory is encountered:
404 437 ///
405 438 /// * At the repository root, ignore that sub-directory
406 439 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
407 440 /// list instead.
408 441 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
409 442 let mut results = Vec::new();
410 443 for entry in path.read_dir()? {
411 444 let entry = entry?;
412 445 let metadata = entry.metadata()?;
413 446 let name = get_bytes_from_os_string(entry.file_name());
414 447 // FIXME don't do this when cached
415 448 if name == b".hg" {
416 449 if is_at_repo_root {
417 450 // Skip the repo’s own .hg (might be a symlink)
418 451 continue;
419 452 } else if metadata.is_dir() {
420 453 // A .hg sub-directory at another location means a subrepo,
421 454 // skip it entirely.
422 455 return Ok(Vec::new());
423 456 }
424 457 }
425 458 results.push(DirEntry {
426 459 base_name: name.into(),
427 460 full_path: entry.path(),
428 461 metadata,
429 462 })
430 463 }
431 464 Ok(results)
432 465 }
433 466 }
General Comments 0
You need to be logged in to leave comments. Login now