Show More
@@ -1,408 +1,426 | |||
|
1 | 1 | use crate::dirstate::status::IgnoreFnType; |
|
2 | 2 | use crate::dirstate_tree::dirstate_map::ChildNodes; |
|
3 | 3 | use crate::dirstate_tree::dirstate_map::DirstateMap; |
|
4 | 4 | use crate::dirstate_tree::dirstate_map::Node; |
|
5 | 5 | use crate::matchers::get_ignore_function; |
|
6 | 6 | use crate::matchers::Matcher; |
|
7 | 7 | use crate::utils::files::get_bytes_from_os_string; |
|
8 | 8 | use crate::utils::hg_path::HgPath; |
|
9 | 9 | use crate::BadMatch; |
|
10 | 10 | use crate::DirstateStatus; |
|
11 | 11 | use crate::EntryState; |
|
12 | 12 | use crate::HgPathBuf; |
|
13 | 13 | use crate::PatternFileWarning; |
|
14 | 14 | use crate::StatusError; |
|
15 | 15 | use crate::StatusOptions; |
|
16 | use rayon::prelude::*; | |
|
16 | 17 | use std::borrow::Cow; |
|
17 | 18 | use std::io; |
|
18 | 19 | use std::path::Path; |
|
19 | 20 | use std::path::PathBuf; |
|
21 | use std::sync::Mutex; | |
|
20 | 22 | |
|
21 | 23 | /// Returns the status of the working directory compared to its parent |
|
22 | 24 | /// changeset. |
|
23 | 25 | /// |
|
24 | 26 | /// This algorithm is based on traversing the filesystem tree (`fs` in function |
|
25 | 27 | /// and variable names) and dirstate tree at the same time. The core of this |
|
26 | 28 | /// traversal is the recursive `traverse_fs_directory_and_dirstate` function |
|
27 | 29 | /// and its use of `itertools::merge_join_by`. When reaching a path that only |
|
28 | 30 | /// exists in one of the two trees, depending on information requested by |
|
29 | 31 | /// `options` we may need to traverse the remaining subtree. |
|
30 | 32 | pub fn status<'tree>( |
|
31 | 33 | dmap: &'tree mut DirstateMap, |
|
32 | 34 | matcher: &(dyn Matcher + Sync), |
|
33 | 35 | root_dir: PathBuf, |
|
34 | 36 | ignore_files: Vec<PathBuf>, |
|
35 | 37 | options: StatusOptions, |
|
36 | 38 | ) -> Result<(DirstateStatus<'tree>, Vec<PatternFileWarning>), StatusError> { |
|
37 | 39 | let (ignore_fn, warnings): (IgnoreFnType, _) = |
|
38 | 40 | if options.list_ignored || options.list_unknown { |
|
39 | 41 | get_ignore_function(ignore_files, &root_dir)? |
|
40 | 42 | } else { |
|
41 | 43 | (Box::new(|&_| true), vec![]) |
|
42 | 44 | }; |
|
43 | 45 | |
|
44 |
let |
|
|
46 | let common = StatusCommon { | |
|
45 | 47 | options, |
|
46 | 48 | matcher, |
|
47 | 49 | ignore_fn, |
|
48 | outcome: DirstateStatus::default(), | |
|
50 | outcome: Mutex::new(DirstateStatus::default()), | |
|
49 | 51 | }; |
|
50 | 52 | let is_at_repo_root = true; |
|
51 | 53 | let hg_path = HgPath::new(""); |
|
52 | 54 | let has_ignored_ancestor = false; |
|
53 | 55 | common.traverse_fs_directory_and_dirstate( |
|
54 | 56 | has_ignored_ancestor, |
|
55 | 57 | &mut dmap.root, |
|
56 | 58 | hg_path, |
|
57 | 59 | &root_dir, |
|
58 | 60 | is_at_repo_root, |
|
59 | 61 | ); |
|
60 | Ok((common.outcome, warnings)) | |
|
62 | Ok((common.outcome.into_inner().unwrap(), warnings)) | |
|
61 | 63 | } |
|
62 | 64 | |
|
63 | 65 | /// Bag of random things needed by various parts of the algorithm. Reduces the |
|
64 | 66 | /// number of parameters passed to functions. |
|
65 | 67 | struct StatusCommon<'tree, 'a> { |
|
66 | 68 | options: StatusOptions, |
|
67 | 69 | matcher: &'a (dyn Matcher + Sync), |
|
68 | 70 | ignore_fn: IgnoreFnType<'a>, |
|
69 | outcome: DirstateStatus<'tree>, | |
|
71 | outcome: Mutex<DirstateStatus<'tree>>, | |
|
70 | 72 | } |
|
71 | 73 | |
|
72 | 74 | impl<'tree, 'a> StatusCommon<'tree, 'a> { |
|
73 | 75 | fn read_dir( |
|
74 |
& |
|
|
76 | &self, | |
|
75 | 77 | hg_path: &HgPath, |
|
76 | 78 | fs_path: &Path, |
|
77 | 79 | is_at_repo_root: bool, |
|
78 | 80 | ) -> Result<Vec<DirEntry>, ()> { |
|
79 | 81 | DirEntry::read_dir(fs_path, is_at_repo_root).map_err(|error| { |
|
80 | 82 | let errno = error.raw_os_error().expect("expected real OS error"); |
|
81 | 83 | self.outcome |
|
84 | .lock() | |
|
85 | .unwrap() | |
|
82 | 86 | .bad |
|
83 | 87 | .push((hg_path.to_owned().into(), BadMatch::OsError(errno))) |
|
84 | 88 | }) |
|
85 | 89 | } |
|
86 | 90 | |
|
87 | 91 | fn traverse_fs_directory_and_dirstate( |
|
88 |
& |
|
|
92 | &self, | |
|
89 | 93 | has_ignored_ancestor: bool, |
|
90 | 94 | dirstate_nodes: &'tree mut ChildNodes, |
|
91 | 95 | directory_hg_path: &'tree HgPath, |
|
92 | 96 | directory_fs_path: &Path, |
|
93 | 97 | is_at_repo_root: bool, |
|
94 | 98 | ) { |
|
95 | 99 | let mut fs_entries = if let Ok(entries) = self.read_dir( |
|
96 | 100 | directory_hg_path, |
|
97 | 101 | directory_fs_path, |
|
98 | 102 | is_at_repo_root, |
|
99 | 103 | ) { |
|
100 | 104 | entries |
|
101 | 105 | } else { |
|
102 | 106 | return; |
|
103 | 107 | }; |
|
104 | 108 | |
|
105 | 109 | // `merge_join_by` requires both its input iterators to be sorted: |
|
106 | 110 | |
|
111 | // | |
|
107 | 112 | // * `BTreeMap` iterates according to keys’ ordering by definition |
|
108 | 113 | |
|
109 | 114 | // `sort_unstable_by_key` doesn’t allow keys borrowing from the value: |
|
110 | 115 | // https://github.com/rust-lang/rust/issues/34162 |
|
111 | 116 | fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name)); |
|
112 | 117 | |
|
113 |
|
|
|
118 | itertools::merge_join_by( | |
|
114 | 119 | dirstate_nodes, |
|
115 | 120 | &fs_entries, |
|
116 | 121 | |(full_path, _node), fs_entry| { |
|
117 | 122 | full_path.base_name().cmp(&fs_entry.base_name) |
|
118 | 123 | }, |
|
119 |
) |
|
|
124 | ) | |
|
125 | .par_bridge() | |
|
126 | .for_each(|pair| { | |
|
120 | 127 | use itertools::EitherOrBoth::*; |
|
121 | 128 | match pair { |
|
122 | 129 | Both((hg_path, dirstate_node), fs_entry) => { |
|
123 | 130 | self.traverse_fs_and_dirstate( |
|
124 | 131 | fs_entry, |
|
125 | 132 | hg_path.full_path(), |
|
126 | 133 | dirstate_node, |
|
127 | 134 | has_ignored_ancestor, |
|
128 | 135 | ); |
|
129 | 136 | } |
|
130 | 137 | Left((hg_path, dirstate_node)) => self.traverse_dirstate_only( |
|
131 | 138 | hg_path.full_path(), |
|
132 | 139 | dirstate_node, |
|
133 | 140 | ), |
|
134 | 141 | Right(fs_entry) => self.traverse_fs_only( |
|
135 | 142 | has_ignored_ancestor, |
|
136 | 143 | directory_hg_path, |
|
137 | 144 | fs_entry, |
|
138 | 145 | ), |
|
139 | 146 | } |
|
140 | } | |
|
147 | }) | |
|
141 | 148 | } |
|
142 | 149 | |
|
143 | 150 | fn traverse_fs_and_dirstate( |
|
144 |
& |
|
|
151 | &self, | |
|
145 | 152 | fs_entry: &DirEntry, |
|
146 | 153 | hg_path: &'tree HgPath, |
|
147 | 154 | dirstate_node: &'tree mut Node, |
|
148 | 155 | has_ignored_ancestor: bool, |
|
149 | 156 | ) { |
|
150 | 157 | let file_type = fs_entry.metadata.file_type(); |
|
151 | 158 | let file_or_symlink = file_type.is_file() || file_type.is_symlink(); |
|
152 | 159 | if !file_or_symlink { |
|
153 | 160 | // If we previously had a file here, it was removed (with |
|
154 | 161 | // `hg rm` or similar) or deleted before it could be |
|
155 | 162 | // replaced by a directory or something else. |
|
156 | 163 | self.mark_removed_or_deleted_if_file( |
|
157 | 164 | hg_path, |
|
158 | 165 | dirstate_node.state(), |
|
159 | 166 | ); |
|
160 | 167 | } |
|
161 | 168 | if file_type.is_dir() { |
|
162 | 169 | if self.options.collect_traversed_dirs { |
|
163 | self.outcome.traversed.push(hg_path.into()) | |
|
170 | self.outcome.lock().unwrap().traversed.push(hg_path.into()) | |
|
164 | 171 | } |
|
165 | 172 | let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path); |
|
166 | 173 | let is_at_repo_root = false; |
|
167 | 174 | self.traverse_fs_directory_and_dirstate( |
|
168 | 175 | is_ignored, |
|
169 | 176 | &mut dirstate_node.children, |
|
170 | 177 | hg_path, |
|
171 | 178 | &fs_entry.full_path, |
|
172 | 179 | is_at_repo_root, |
|
173 | 180 | ); |
|
174 | 181 | } else { |
|
175 | 182 | if file_or_symlink && self.matcher.matches(hg_path) { |
|
176 | 183 | let full_path = Cow::from(hg_path); |
|
177 | 184 | if let Some(entry) = &dirstate_node.entry { |
|
178 | 185 | match entry.state { |
|
179 | 186 | EntryState::Added => { |
|
180 | self.outcome.added.push(full_path) | |
|
187 | self.outcome.lock().unwrap().added.push(full_path) | |
|
181 | 188 | } |
|
182 |
EntryState::Removed => |
|
|
183 |
|
|
|
184 |
|
|
|
185 |
|
|
|
186 |
|
|
|
187 |
|
|
|
189 | EntryState::Removed => self | |
|
190 | .outcome | |
|
191 | .lock() | |
|
192 | .unwrap() | |
|
193 | .removed | |
|
194 | .push(full_path), | |
|
195 | EntryState::Merged => self | |
|
196 | .outcome | |
|
197 | .lock() | |
|
198 | .unwrap() | |
|
199 | .modified | |
|
200 | .push(full_path), | |
|
188 | 201 | EntryState::Normal => { |
|
189 | 202 | self.handle_normal_file( |
|
190 | 203 | full_path, |
|
191 | 204 | dirstate_node, |
|
192 | 205 | entry, |
|
193 | 206 | fs_entry, |
|
194 | 207 | ); |
|
195 | 208 | } |
|
196 | 209 | // This variant is not used in DirstateMap |
|
197 | 210 | // nodes |
|
198 | 211 | EntryState::Unknown => unreachable!(), |
|
199 | 212 | } |
|
200 | 213 | } else { |
|
201 | 214 | // `node.entry.is_none()` indicates a "directory" |
|
202 | 215 | // node, but the filesystem has a file |
|
203 | 216 | self.mark_unknown_or_ignored( |
|
204 | 217 | has_ignored_ancestor, |
|
205 | 218 | full_path, |
|
206 | 219 | ) |
|
207 | 220 | } |
|
208 | 221 | } |
|
209 | 222 | |
|
210 | 223 | for (child_hg_path, child_node) in &mut dirstate_node.children { |
|
211 | 224 | self.traverse_dirstate_only( |
|
212 | 225 | child_hg_path.full_path(), |
|
213 | 226 | child_node, |
|
214 | 227 | ) |
|
215 | 228 | } |
|
216 | 229 | } |
|
217 | 230 | } |
|
218 | 231 | |
|
219 | 232 | /// A file with `EntryState::Normal` in the dirstate was found in the |
|
220 | 233 | /// filesystem |
|
221 | 234 | fn handle_normal_file( |
|
222 |
& |
|
|
235 | &self, | |
|
223 | 236 | full_path: Cow<'tree, HgPath>, |
|
224 | 237 | dirstate_node: &Node, |
|
225 | 238 | entry: &crate::DirstateEntry, |
|
226 | 239 | fs_entry: &DirEntry, |
|
227 | 240 | ) { |
|
228 | 241 | // Keep the low 31 bits |
|
229 | 242 | fn truncate_u64(value: u64) -> i32 { |
|
230 | 243 | (value & 0x7FFF_FFFF) as i32 |
|
231 | 244 | } |
|
232 | 245 | fn truncate_i64(value: i64) -> i32 { |
|
233 | 246 | (value & 0x7FFF_FFFF) as i32 |
|
234 | 247 | } |
|
235 | 248 | |
|
236 | 249 | let mode_changed = || { |
|
237 | 250 | self.options.check_exec && entry.mode_changed(&fs_entry.metadata) |
|
238 | 251 | }; |
|
239 | 252 | let size_changed = entry.size != truncate_u64(fs_entry.metadata.len()); |
|
240 | 253 | if entry.size >= 0 |
|
241 | 254 | && size_changed |
|
242 | 255 | && fs_entry.metadata.file_type().is_symlink() |
|
243 | 256 | { |
|
244 | 257 | // issue6456: Size returned may be longer due to encryption |
|
245 | 258 | // on EXT-4 fscrypt. TODO maybe only do it on EXT4? |
|
246 | self.outcome.unsure.push(full_path) | |
|
259 | self.outcome.lock().unwrap().unsure.push(full_path) | |
|
247 | 260 | } else if dirstate_node.copy_source.is_some() |
|
248 | 261 | || entry.is_from_other_parent() |
|
249 | 262 | || (entry.size >= 0 && (size_changed || mode_changed())) |
|
250 | 263 | { |
|
251 | self.outcome.modified.push(full_path) | |
|
264 | self.outcome.lock().unwrap().modified.push(full_path) | |
|
252 | 265 | } else { |
|
253 | 266 | let mtime = mtime_seconds(&fs_entry.metadata); |
|
254 | 267 | if truncate_i64(mtime) != entry.mtime |
|
255 | 268 | || mtime == self.options.last_normal_time |
|
256 | 269 | { |
|
257 | self.outcome.unsure.push(full_path) | |
|
270 | self.outcome.lock().unwrap().unsure.push(full_path) | |
|
258 | 271 | } else if self.options.list_clean { |
|
259 | self.outcome.clean.push(full_path) | |
|
272 | self.outcome.lock().unwrap().clean.push(full_path) | |
|
260 | 273 | } |
|
261 | 274 | } |
|
262 | 275 | } |
|
263 | 276 | |
|
264 | 277 | /// A node in the dirstate tree has no corresponding filesystem entry |
|
265 | 278 | fn traverse_dirstate_only( |
|
266 |
& |
|
|
279 | &self, | |
|
267 | 280 | hg_path: &'tree HgPath, |
|
268 | 281 | dirstate_node: &'tree mut Node, |
|
269 | 282 | ) { |
|
270 | 283 | self.mark_removed_or_deleted_if_file(hg_path, dirstate_node.state()); |
|
271 | for (child_hg_path, child_node) in &mut dirstate_node.children { | |
|
272 |
|
|
|
273 | } | |
|
284 | dirstate_node.children.par_iter_mut().for_each( | |
|
285 | |(child_hg_path, child_node)| { | |
|
286 | self.traverse_dirstate_only( | |
|
287 | child_hg_path.full_path(), | |
|
288 | child_node, | |
|
289 | ) | |
|
290 | }, | |
|
291 | ) | |
|
274 | 292 | } |
|
275 | 293 | |
|
276 | 294 | /// A node in the dirstate tree has no corresponding *file* on the |
|
277 | 295 | /// filesystem |
|
278 | 296 | /// |
|
279 | 297 | /// Does nothing on a "directory" node |
|
280 | 298 | fn mark_removed_or_deleted_if_file( |
|
281 |
& |
|
|
299 | &self, | |
|
282 | 300 | hg_path: &'tree HgPath, |
|
283 | 301 | dirstate_node_state: Option<EntryState>, |
|
284 | 302 | ) { |
|
285 | 303 | if let Some(state) = dirstate_node_state { |
|
286 | 304 | if self.matcher.matches(hg_path) { |
|
287 | 305 | if let EntryState::Removed = state { |
|
288 | self.outcome.removed.push(hg_path.into()) | |
|
306 | self.outcome.lock().unwrap().removed.push(hg_path.into()) | |
|
289 | 307 | } else { |
|
290 | self.outcome.deleted.push(hg_path.into()) | |
|
308 | self.outcome.lock().unwrap().deleted.push(hg_path.into()) | |
|
291 | 309 | } |
|
292 | 310 | } |
|
293 | 311 | } |
|
294 | 312 | } |
|
295 | 313 | |
|
296 | 314 | /// Something in the filesystem has no corresponding dirstate node |
|
297 | 315 | fn traverse_fs_only( |
|
298 |
& |
|
|
316 | &self, | |
|
299 | 317 | has_ignored_ancestor: bool, |
|
300 | 318 | directory_hg_path: &HgPath, |
|
301 | 319 | fs_entry: &DirEntry, |
|
302 | 320 | ) { |
|
303 | 321 | let hg_path = directory_hg_path.join(&fs_entry.base_name); |
|
304 | 322 | let file_type = fs_entry.metadata.file_type(); |
|
305 | 323 | let file_or_symlink = file_type.is_file() || file_type.is_symlink(); |
|
306 | 324 | if file_type.is_dir() { |
|
307 | 325 | let is_ignored = |
|
308 | 326 | has_ignored_ancestor || (self.ignore_fn)(&hg_path); |
|
309 | 327 | let traverse_children = if is_ignored { |
|
310 | 328 | // Descendants of an ignored directory are all ignored |
|
311 | 329 | self.options.list_ignored |
|
312 | 330 | } else { |
|
313 | 331 | // Descendants of an unknown directory may be either unknown or |
|
314 | 332 | // ignored |
|
315 | 333 | self.options.list_unknown || self.options.list_ignored |
|
316 | 334 | }; |
|
317 | 335 | if traverse_children { |
|
318 | 336 | let is_at_repo_root = false; |
|
319 | 337 | if let Ok(children_fs_entries) = self.read_dir( |
|
320 | 338 | &hg_path, |
|
321 | 339 | &fs_entry.full_path, |
|
322 | 340 | is_at_repo_root, |
|
323 | 341 | ) { |
|
324 | for child_fs_entry in children_fs_entries { | |
|
342 | children_fs_entries.par_iter().for_each(|child_fs_entry| { | |
|
325 | 343 | self.traverse_fs_only( |
|
326 | 344 | is_ignored, |
|
327 | 345 | &hg_path, |
|
328 |
|
|
|
346 | child_fs_entry, | |
|
329 | 347 | ) |
|
330 | } | |
|
348 | }) | |
|
331 | 349 | } |
|
332 | 350 | } |
|
333 | 351 | if self.options.collect_traversed_dirs { |
|
334 | self.outcome.traversed.push(hg_path.into()) | |
|
352 | self.outcome.lock().unwrap().traversed.push(hg_path.into()) | |
|
335 | 353 | } |
|
336 | 354 | } else if file_or_symlink && self.matcher.matches(&hg_path) { |
|
337 | 355 | self.mark_unknown_or_ignored(has_ignored_ancestor, hg_path.into()) |
|
338 | 356 | } |
|
339 | 357 | } |
|
340 | 358 | |
|
341 | 359 | fn mark_unknown_or_ignored( |
|
342 |
& |
|
|
360 | &self, | |
|
343 | 361 | has_ignored_ancestor: bool, |
|
344 | 362 | hg_path: Cow<'tree, HgPath>, |
|
345 | 363 | ) { |
|
346 | 364 | let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path); |
|
347 | 365 | if is_ignored { |
|
348 | 366 | if self.options.list_ignored { |
|
349 | self.outcome.ignored.push(hg_path) | |
|
367 | self.outcome.lock().unwrap().ignored.push(hg_path) | |
|
350 | 368 | } |
|
351 | 369 | } else { |
|
352 | 370 | if self.options.list_unknown { |
|
353 | self.outcome.unknown.push(hg_path) | |
|
371 | self.outcome.lock().unwrap().unknown.push(hg_path) | |
|
354 | 372 | } |
|
355 | 373 | } |
|
356 | 374 | } |
|
357 | 375 | } |
|
358 | 376 | |
|
359 | 377 | #[cfg(unix)] // TODO |
|
360 | 378 | fn mtime_seconds(metadata: &std::fs::Metadata) -> i64 { |
|
361 | 379 | // Going through `Metadata::modified()` would be portable, but would take |
|
362 | 380 | // care to construct a `SystemTime` value with sub-second precision just |
|
363 | 381 | // for us to throw that away here. |
|
364 | 382 | use std::os::unix::fs::MetadataExt; |
|
365 | 383 | metadata.mtime() |
|
366 | 384 | } |
|
367 | 385 | |
|
368 | 386 | struct DirEntry { |
|
369 | 387 | base_name: HgPathBuf, |
|
370 | 388 | full_path: PathBuf, |
|
371 | 389 | metadata: std::fs::Metadata, |
|
372 | 390 | } |
|
373 | 391 | |
|
374 | 392 | impl DirEntry { |
|
375 | 393 | /// Returns **unsorted** entries in the given directory, with name and |
|
376 | 394 | /// metadata. |
|
377 | 395 | /// |
|
378 | 396 | /// If a `.hg` sub-directory is encountered: |
|
379 | 397 | /// |
|
380 | 398 | /// * At the repository root, ignore that sub-directory |
|
381 | 399 | /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty |
|
382 | 400 | /// list instead. |
|
383 | 401 | fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> { |
|
384 | 402 | let mut results = Vec::new(); |
|
385 | 403 | for entry in path.read_dir()? { |
|
386 | 404 | let entry = entry?; |
|
387 | 405 | let metadata = entry.metadata()?; |
|
388 | 406 | let name = get_bytes_from_os_string(entry.file_name()); |
|
389 | 407 | // FIXME don't do this when cached |
|
390 | 408 | if name == b".hg" { |
|
391 | 409 | if is_at_repo_root { |
|
392 | 410 | // Skip the repo’s own .hg (might be a symlink) |
|
393 | 411 | continue; |
|
394 | 412 | } else if metadata.is_dir() { |
|
395 | 413 | // A .hg sub-directory at another location means a subrepo, |
|
396 | 414 | // skip it entirely. |
|
397 | 415 | return Ok(Vec::new()); |
|
398 | 416 | } |
|
399 | 417 | } |
|
400 | 418 | results.push(DirEntry { |
|
401 | 419 | base_name: name.into(), |
|
402 | 420 | full_path: entry.path(), |
|
403 | 421 | metadata, |
|
404 | 422 | }) |
|
405 | 423 | } |
|
406 | 424 | Ok(results) |
|
407 | 425 | } |
|
408 | 426 | } |
General Comments 0
You need to be logged in to leave comments.
Login now