##// END OF EJS Templates
dirstate-tree: Paralellize the status algorithm with Rayon...
Simon Sapin -
r47887:60d852ae default
parent child Browse files
Show More
@@ -1,408 +1,426
1 1 use crate::dirstate::status::IgnoreFnType;
2 2 use crate::dirstate_tree::dirstate_map::ChildNodes;
3 3 use crate::dirstate_tree::dirstate_map::DirstateMap;
4 4 use crate::dirstate_tree::dirstate_map::Node;
5 5 use crate::matchers::get_ignore_function;
6 6 use crate::matchers::Matcher;
7 7 use crate::utils::files::get_bytes_from_os_string;
8 8 use crate::utils::hg_path::HgPath;
9 9 use crate::BadMatch;
10 10 use crate::DirstateStatus;
11 11 use crate::EntryState;
12 12 use crate::HgPathBuf;
13 13 use crate::PatternFileWarning;
14 14 use crate::StatusError;
15 15 use crate::StatusOptions;
16 use rayon::prelude::*;
16 17 use std::borrow::Cow;
17 18 use std::io;
18 19 use std::path::Path;
19 20 use std::path::PathBuf;
21 use std::sync::Mutex;
20 22
21 23 /// Returns the status of the working directory compared to its parent
22 24 /// changeset.
23 25 ///
24 26 /// This algorithm is based on traversing the filesystem tree (`fs` in function
25 27 /// and variable names) and dirstate tree at the same time. The core of this
26 28 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
27 29 /// and its use of `itertools::merge_join_by`. When reaching a path that only
28 30 /// exists in one of the two trees, depending on information requested by
29 31 /// `options` we may need to traverse the remaining subtree.
30 32 pub fn status<'tree>(
31 33 dmap: &'tree mut DirstateMap,
32 34 matcher: &(dyn Matcher + Sync),
33 35 root_dir: PathBuf,
34 36 ignore_files: Vec<PathBuf>,
35 37 options: StatusOptions,
36 38 ) -> Result<(DirstateStatus<'tree>, Vec<PatternFileWarning>), StatusError> {
37 39 let (ignore_fn, warnings): (IgnoreFnType, _) =
38 40 if options.list_ignored || options.list_unknown {
39 41 get_ignore_function(ignore_files, &root_dir)?
40 42 } else {
41 43 (Box::new(|&_| true), vec![])
42 44 };
43 45
44 let mut common = StatusCommon {
46 let common = StatusCommon {
45 47 options,
46 48 matcher,
47 49 ignore_fn,
48 outcome: DirstateStatus::default(),
50 outcome: Mutex::new(DirstateStatus::default()),
49 51 };
50 52 let is_at_repo_root = true;
51 53 let hg_path = HgPath::new("");
52 54 let has_ignored_ancestor = false;
53 55 common.traverse_fs_directory_and_dirstate(
54 56 has_ignored_ancestor,
55 57 &mut dmap.root,
56 58 hg_path,
57 59 &root_dir,
58 60 is_at_repo_root,
59 61 );
60 Ok((common.outcome, warnings))
62 Ok((common.outcome.into_inner().unwrap(), warnings))
61 63 }
62 64
63 65 /// Bag of random things needed by various parts of the algorithm. Reduces the
64 66 /// number of parameters passed to functions.
65 67 struct StatusCommon<'tree, 'a> {
66 68 options: StatusOptions,
67 69 matcher: &'a (dyn Matcher + Sync),
68 70 ignore_fn: IgnoreFnType<'a>,
69 outcome: DirstateStatus<'tree>,
71 outcome: Mutex<DirstateStatus<'tree>>,
70 72 }
71 73
72 74 impl<'tree, 'a> StatusCommon<'tree, 'a> {
73 75 fn read_dir(
74 &mut self,
76 &self,
75 77 hg_path: &HgPath,
76 78 fs_path: &Path,
77 79 is_at_repo_root: bool,
78 80 ) -> Result<Vec<DirEntry>, ()> {
79 81 DirEntry::read_dir(fs_path, is_at_repo_root).map_err(|error| {
80 82 let errno = error.raw_os_error().expect("expected real OS error");
81 83 self.outcome
84 .lock()
85 .unwrap()
82 86 .bad
83 87 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
84 88 })
85 89 }
86 90
87 91 fn traverse_fs_directory_and_dirstate(
88 &mut self,
92 &self,
89 93 has_ignored_ancestor: bool,
90 94 dirstate_nodes: &'tree mut ChildNodes,
91 95 directory_hg_path: &'tree HgPath,
92 96 directory_fs_path: &Path,
93 97 is_at_repo_root: bool,
94 98 ) {
95 99 let mut fs_entries = if let Ok(entries) = self.read_dir(
96 100 directory_hg_path,
97 101 directory_fs_path,
98 102 is_at_repo_root,
99 103 ) {
100 104 entries
101 105 } else {
102 106 return;
103 107 };
104 108
105 109 // `merge_join_by` requires both its input iterators to be sorted:
106 110
111 //
107 112 // * `BTreeMap` iterates according to keys’ ordering by definition
108 113
109 114 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
110 115 // https://github.com/rust-lang/rust/issues/34162
111 116 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
112 117
113 for pair in itertools::merge_join_by(
118 itertools::merge_join_by(
114 119 dirstate_nodes,
115 120 &fs_entries,
116 121 |(full_path, _node), fs_entry| {
117 122 full_path.base_name().cmp(&fs_entry.base_name)
118 123 },
119 ) {
124 )
125 .par_bridge()
126 .for_each(|pair| {
120 127 use itertools::EitherOrBoth::*;
121 128 match pair {
122 129 Both((hg_path, dirstate_node), fs_entry) => {
123 130 self.traverse_fs_and_dirstate(
124 131 fs_entry,
125 132 hg_path.full_path(),
126 133 dirstate_node,
127 134 has_ignored_ancestor,
128 135 );
129 136 }
130 137 Left((hg_path, dirstate_node)) => self.traverse_dirstate_only(
131 138 hg_path.full_path(),
132 139 dirstate_node,
133 140 ),
134 141 Right(fs_entry) => self.traverse_fs_only(
135 142 has_ignored_ancestor,
136 143 directory_hg_path,
137 144 fs_entry,
138 145 ),
139 146 }
140 }
147 })
141 148 }
142 149
143 150 fn traverse_fs_and_dirstate(
144 &mut self,
151 &self,
145 152 fs_entry: &DirEntry,
146 153 hg_path: &'tree HgPath,
147 154 dirstate_node: &'tree mut Node,
148 155 has_ignored_ancestor: bool,
149 156 ) {
150 157 let file_type = fs_entry.metadata.file_type();
151 158 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
152 159 if !file_or_symlink {
153 160 // If we previously had a file here, it was removed (with
154 161 // `hg rm` or similar) or deleted before it could be
155 162 // replaced by a directory or something else.
156 163 self.mark_removed_or_deleted_if_file(
157 164 hg_path,
158 165 dirstate_node.state(),
159 166 );
160 167 }
161 168 if file_type.is_dir() {
162 169 if self.options.collect_traversed_dirs {
163 self.outcome.traversed.push(hg_path.into())
170 self.outcome.lock().unwrap().traversed.push(hg_path.into())
164 171 }
165 172 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
166 173 let is_at_repo_root = false;
167 174 self.traverse_fs_directory_and_dirstate(
168 175 is_ignored,
169 176 &mut dirstate_node.children,
170 177 hg_path,
171 178 &fs_entry.full_path,
172 179 is_at_repo_root,
173 180 );
174 181 } else {
175 182 if file_or_symlink && self.matcher.matches(hg_path) {
176 183 let full_path = Cow::from(hg_path);
177 184 if let Some(entry) = &dirstate_node.entry {
178 185 match entry.state {
179 186 EntryState::Added => {
180 self.outcome.added.push(full_path)
187 self.outcome.lock().unwrap().added.push(full_path)
181 188 }
182 EntryState::Removed => {
183 self.outcome.removed.push(full_path)
184 }
185 EntryState::Merged => {
186 self.outcome.modified.push(full_path)
187 }
189 EntryState::Removed => self
190 .outcome
191 .lock()
192 .unwrap()
193 .removed
194 .push(full_path),
195 EntryState::Merged => self
196 .outcome
197 .lock()
198 .unwrap()
199 .modified
200 .push(full_path),
188 201 EntryState::Normal => {
189 202 self.handle_normal_file(
190 203 full_path,
191 204 dirstate_node,
192 205 entry,
193 206 fs_entry,
194 207 );
195 208 }
196 209 // This variant is not used in DirstateMap
197 210 // nodes
198 211 EntryState::Unknown => unreachable!(),
199 212 }
200 213 } else {
201 214 // `node.entry.is_none()` indicates a "directory"
202 215 // node, but the filesystem has a file
203 216 self.mark_unknown_or_ignored(
204 217 has_ignored_ancestor,
205 218 full_path,
206 219 )
207 220 }
208 221 }
209 222
210 223 for (child_hg_path, child_node) in &mut dirstate_node.children {
211 224 self.traverse_dirstate_only(
212 225 child_hg_path.full_path(),
213 226 child_node,
214 227 )
215 228 }
216 229 }
217 230 }
218 231
219 232 /// A file with `EntryState::Normal` in the dirstate was found in the
220 233 /// filesystem
221 234 fn handle_normal_file(
222 &mut self,
235 &self,
223 236 full_path: Cow<'tree, HgPath>,
224 237 dirstate_node: &Node,
225 238 entry: &crate::DirstateEntry,
226 239 fs_entry: &DirEntry,
227 240 ) {
228 241 // Keep the low 31 bits
229 242 fn truncate_u64(value: u64) -> i32 {
230 243 (value & 0x7FFF_FFFF) as i32
231 244 }
232 245 fn truncate_i64(value: i64) -> i32 {
233 246 (value & 0x7FFF_FFFF) as i32
234 247 }
235 248
236 249 let mode_changed = || {
237 250 self.options.check_exec && entry.mode_changed(&fs_entry.metadata)
238 251 };
239 252 let size_changed = entry.size != truncate_u64(fs_entry.metadata.len());
240 253 if entry.size >= 0
241 254 && size_changed
242 255 && fs_entry.metadata.file_type().is_symlink()
243 256 {
244 257 // issue6456: Size returned may be longer due to encryption
245 258 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
246 self.outcome.unsure.push(full_path)
259 self.outcome.lock().unwrap().unsure.push(full_path)
247 260 } else if dirstate_node.copy_source.is_some()
248 261 || entry.is_from_other_parent()
249 262 || (entry.size >= 0 && (size_changed || mode_changed()))
250 263 {
251 self.outcome.modified.push(full_path)
264 self.outcome.lock().unwrap().modified.push(full_path)
252 265 } else {
253 266 let mtime = mtime_seconds(&fs_entry.metadata);
254 267 if truncate_i64(mtime) != entry.mtime
255 268 || mtime == self.options.last_normal_time
256 269 {
257 self.outcome.unsure.push(full_path)
270 self.outcome.lock().unwrap().unsure.push(full_path)
258 271 } else if self.options.list_clean {
259 self.outcome.clean.push(full_path)
272 self.outcome.lock().unwrap().clean.push(full_path)
260 273 }
261 274 }
262 275 }
263 276
264 277 /// A node in the dirstate tree has no corresponding filesystem entry
265 278 fn traverse_dirstate_only(
266 &mut self,
279 &self,
267 280 hg_path: &'tree HgPath,
268 281 dirstate_node: &'tree mut Node,
269 282 ) {
270 283 self.mark_removed_or_deleted_if_file(hg_path, dirstate_node.state());
271 for (child_hg_path, child_node) in &mut dirstate_node.children {
272 self.traverse_dirstate_only(child_hg_path.full_path(), child_node)
273 }
284 dirstate_node.children.par_iter_mut().for_each(
285 |(child_hg_path, child_node)| {
286 self.traverse_dirstate_only(
287 child_hg_path.full_path(),
288 child_node,
289 )
290 },
291 )
274 292 }
275 293
276 294 /// A node in the dirstate tree has no corresponding *file* on the
277 295 /// filesystem
278 296 ///
279 297 /// Does nothing on a "directory" node
280 298 fn mark_removed_or_deleted_if_file(
281 &mut self,
299 &self,
282 300 hg_path: &'tree HgPath,
283 301 dirstate_node_state: Option<EntryState>,
284 302 ) {
285 303 if let Some(state) = dirstate_node_state {
286 304 if self.matcher.matches(hg_path) {
287 305 if let EntryState::Removed = state {
288 self.outcome.removed.push(hg_path.into())
306 self.outcome.lock().unwrap().removed.push(hg_path.into())
289 307 } else {
290 self.outcome.deleted.push(hg_path.into())
308 self.outcome.lock().unwrap().deleted.push(hg_path.into())
291 309 }
292 310 }
293 311 }
294 312 }
295 313
296 314 /// Something in the filesystem has no corresponding dirstate node
297 315 fn traverse_fs_only(
298 &mut self,
316 &self,
299 317 has_ignored_ancestor: bool,
300 318 directory_hg_path: &HgPath,
301 319 fs_entry: &DirEntry,
302 320 ) {
303 321 let hg_path = directory_hg_path.join(&fs_entry.base_name);
304 322 let file_type = fs_entry.metadata.file_type();
305 323 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
306 324 if file_type.is_dir() {
307 325 let is_ignored =
308 326 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
309 327 let traverse_children = if is_ignored {
310 328 // Descendants of an ignored directory are all ignored
311 329 self.options.list_ignored
312 330 } else {
313 331 // Descendants of an unknown directory may be either unknown or
314 332 // ignored
315 333 self.options.list_unknown || self.options.list_ignored
316 334 };
317 335 if traverse_children {
318 336 let is_at_repo_root = false;
319 337 if let Ok(children_fs_entries) = self.read_dir(
320 338 &hg_path,
321 339 &fs_entry.full_path,
322 340 is_at_repo_root,
323 341 ) {
324 for child_fs_entry in children_fs_entries {
342 children_fs_entries.par_iter().for_each(|child_fs_entry| {
325 343 self.traverse_fs_only(
326 344 is_ignored,
327 345 &hg_path,
328 &child_fs_entry,
346 child_fs_entry,
329 347 )
330 }
348 })
331 349 }
332 350 }
333 351 if self.options.collect_traversed_dirs {
334 self.outcome.traversed.push(hg_path.into())
352 self.outcome.lock().unwrap().traversed.push(hg_path.into())
335 353 }
336 354 } else if file_or_symlink && self.matcher.matches(&hg_path) {
337 355 self.mark_unknown_or_ignored(has_ignored_ancestor, hg_path.into())
338 356 }
339 357 }
340 358
341 359 fn mark_unknown_or_ignored(
342 &mut self,
360 &self,
343 361 has_ignored_ancestor: bool,
344 362 hg_path: Cow<'tree, HgPath>,
345 363 ) {
346 364 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
347 365 if is_ignored {
348 366 if self.options.list_ignored {
349 self.outcome.ignored.push(hg_path)
367 self.outcome.lock().unwrap().ignored.push(hg_path)
350 368 }
351 369 } else {
352 370 if self.options.list_unknown {
353 self.outcome.unknown.push(hg_path)
371 self.outcome.lock().unwrap().unknown.push(hg_path)
354 372 }
355 373 }
356 374 }
357 375 }
358 376
359 377 #[cfg(unix)] // TODO
360 378 fn mtime_seconds(metadata: &std::fs::Metadata) -> i64 {
361 379 // Going through `Metadata::modified()` would be portable, but would take
362 380 // care to construct a `SystemTime` value with sub-second precision just
363 381 // for us to throw that away here.
364 382 use std::os::unix::fs::MetadataExt;
365 383 metadata.mtime()
366 384 }
367 385
368 386 struct DirEntry {
369 387 base_name: HgPathBuf,
370 388 full_path: PathBuf,
371 389 metadata: std::fs::Metadata,
372 390 }
373 391
374 392 impl DirEntry {
375 393 /// Returns **unsorted** entries in the given directory, with name and
376 394 /// metadata.
377 395 ///
378 396 /// If a `.hg` sub-directory is encountered:
379 397 ///
380 398 /// * At the repository root, ignore that sub-directory
381 399 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
382 400 /// list instead.
383 401 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
384 402 let mut results = Vec::new();
385 403 for entry in path.read_dir()? {
386 404 let entry = entry?;
387 405 let metadata = entry.metadata()?;
388 406 let name = get_bytes_from_os_string(entry.file_name());
389 407 // FIXME don't do this when cached
390 408 if name == b".hg" {
391 409 if is_at_repo_root {
392 410 // Skip the repo’s own .hg (might be a symlink)
393 411 continue;
394 412 } else if metadata.is_dir() {
395 413 // A .hg sub-directory at another location means a subrepo,
396 414 // skip it entirely.
397 415 return Ok(Vec::new());
398 416 }
399 417 }
400 418 results.push(DirEntry {
401 419 base_name: name.into(),
402 420 full_path: entry.path(),
403 421 metadata,
404 422 })
405 423 }
406 424 Ok(results)
407 425 }
408 426 }
General Comments 0
You need to be logged in to leave comments. Login now