##// END OF EJS Templates
dirstate-tree: Add #[timed] attribute to `status` and `DirstateMap::read`...
Simon Sapin -
r47888:c92e6376 default
parent child Browse files
Show More
@@ -1,650 +1,652 b''
1 1 use bytes_cast::BytesCast;
2 use micro_timer::timed;
2 3 use std::path::PathBuf;
3 4 use std::{collections::BTreeMap, convert::TryInto};
4 5
5 6 use super::path_with_basename::WithBasename;
6 7 use crate::dirstate::parsers::clear_ambiguous_mtime;
7 8 use crate::dirstate::parsers::pack_entry;
8 9 use crate::dirstate::parsers::packed_entry_size;
9 10 use crate::dirstate::parsers::parse_dirstate_entries;
10 11 use crate::dirstate::parsers::parse_dirstate_parents;
11 12 use crate::dirstate::parsers::Timestamp;
12 13 use crate::matchers::Matcher;
13 14 use crate::revlog::node::NULL_NODE;
14 15 use crate::utils::hg_path::{HgPath, HgPathBuf};
15 16 use crate::CopyMapIter;
16 17 use crate::DirstateEntry;
17 18 use crate::DirstateError;
18 19 use crate::DirstateMapError;
19 20 use crate::DirstateParents;
20 21 use crate::DirstateStatus;
21 22 use crate::EntryState;
22 23 use crate::PatternFileWarning;
23 24 use crate::StateMapIter;
24 25 use crate::StatusError;
25 26 use crate::StatusOptions;
26 27
27 28 pub struct DirstateMap {
28 29 parents: Option<DirstateParents>,
29 30 dirty_parents: bool,
30 31 pub(super) root: ChildNodes,
31 32
32 33 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
33 34 nodes_with_entry_count: usize,
34 35
35 36 /// Number of nodes anywhere in the tree that have
36 37 /// `.copy_source.is_some()`.
37 38 nodes_with_copy_source_count: usize,
38 39 }
39 40
40 41 /// Using a plain `HgPathBuf` of the full path from the repository root as a
41 42 /// map key would also work: all paths in a given map have the same parent
42 43 /// path, so comparing full paths gives the same result as comparing base
43 44 /// names. However `BTreeMap` would waste time always re-comparing the same
44 45 /// string prefix.
45 46 pub(super) type ChildNodes = BTreeMap<WithBasename<HgPathBuf>, Node>;
46 47
47 48 /// Represents a file or a directory
48 49 #[derive(Default)]
49 50 pub(super) struct Node {
50 51 /// `None` for directories
51 52 pub(super) entry: Option<DirstateEntry>,
52 53
53 54 pub(super) copy_source: Option<HgPathBuf>,
54 55
55 56 pub(super) children: ChildNodes,
56 57
57 58 /// How many (non-inclusive) descendants of this node are tracked files
58 59 tracked_descendants_count: usize,
59 60 }
60 61
61 62 impl Node {
62 63 /// Whether this node has a `DirstateEntry` with `.state.is_tracked()`
63 64 fn is_tracked_file(&self) -> bool {
64 65 if let Some(entry) = &self.entry {
65 66 entry.state.is_tracked()
66 67 } else {
67 68 false
68 69 }
69 70 }
70 71
71 72 pub(super) fn state(&self) -> Option<EntryState> {
72 73 self.entry.as_ref().map(|entry| entry.state)
73 74 }
74 75 }
75 76
76 77 /// `(full_path, entry, copy_source)`
77 78 type NodeDataMut<'a> = (
78 79 &'a WithBasename<HgPathBuf>,
79 80 &'a mut Option<DirstateEntry>,
80 81 &'a mut Option<HgPathBuf>,
81 82 );
82 83
83 84 impl DirstateMap {
84 85 pub fn new() -> Self {
85 86 Self {
86 87 parents: None,
87 88 dirty_parents: false,
88 89 root: ChildNodes::new(),
89 90 nodes_with_entry_count: 0,
90 91 nodes_with_copy_source_count: 0,
91 92 }
92 93 }
93 94
94 95 fn get_node(&self, path: &HgPath) -> Option<&Node> {
95 96 let mut children = &self.root;
96 97 let mut components = path.components();
97 98 let mut component =
98 99 components.next().expect("expected at least one components");
99 100 loop {
100 101 let child = children.get(component)?;
101 102 if let Some(next_component) = components.next() {
102 103 component = next_component;
103 104 children = &child.children;
104 105 } else {
105 106 return Some(child);
106 107 }
107 108 }
108 109 }
109 110
110 111 /// Returns a mutable reference to the node at `path` if it exists
111 112 ///
112 113 /// This takes `root` instead of `&mut self` so that callers can mutate
113 114 /// other fields while the returned borrow is still valid
114 115 fn get_node_mut<'tree>(
115 116 root: &'tree mut ChildNodes,
116 117 path: &HgPath,
117 118 ) -> Option<&'tree mut Node> {
118 119 Self::each_and_get(root, path, |_| {})
119 120 }
120 121
121 122 /// Call `each` for each ancestor node of the one at `path` (not including
122 123 /// that node itself), starting from nearest the root.
123 124 ///
124 125 /// Panics (possibly after some calls to `each`) if there is no node at
125 126 /// `path`.
126 127 fn for_each_ancestor_node<'tree>(
127 128 &mut self,
128 129 path: &HgPath,
129 130 each: impl FnMut(&mut Node),
130 131 ) {
131 132 let parent = path.parent();
132 133 if !parent.is_empty() {
133 134 Self::each_and_get(&mut self.root, parent, each)
134 135 .expect("missing dirstate node");
135 136 }
136 137 }
137 138
138 139 /// Common implementation detail of `get_node_mut` and
139 140 /// `for_each_ancestor_node`
140 141 fn each_and_get<'tree>(
141 142 root: &'tree mut ChildNodes,
142 143 path: &HgPath,
143 144 mut each: impl FnMut(&mut Node),
144 145 ) -> Option<&'tree mut Node> {
145 146 let mut children = root;
146 147 let mut components = path.components();
147 148 let mut component =
148 149 components.next().expect("expected at least one components");
149 150 loop {
150 151 let child = children.get_mut(component)?;
151 152 each(child);
152 153 if let Some(next_component) = components.next() {
153 154 component = next_component;
154 155 children = &mut child.children;
155 156 } else {
156 157 return Some(child);
157 158 }
158 159 }
159 160 }
160 161
161 162 fn get_or_insert_node<'tree>(
162 163 root: &'tree mut ChildNodes,
163 164 path: &HgPath,
164 165 ) -> &'tree mut Node {
165 166 let mut child_nodes = root;
166 167 let mut inclusive_ancestor_paths =
167 168 WithBasename::inclusive_ancestors_of(path);
168 169 let mut ancestor_path = inclusive_ancestor_paths
169 170 .next()
170 171 .expect("expected at least one inclusive ancestor");
171 172 loop {
172 173 // TODO: can we avoid allocating an owned key in cases where the
173 174 // map already contains that key, without introducing double
174 175 // lookup?
175 176 let child_node =
176 177 child_nodes.entry(ancestor_path.to_owned()).or_default();
177 178 if let Some(next) = inclusive_ancestor_paths.next() {
178 179 ancestor_path = next;
179 180 child_nodes = &mut child_node.children;
180 181 } else {
181 182 return child_node;
182 183 }
183 184 }
184 185 }
185 186
186 187 /// The meaning of `new_copy_source` is:
187 188 ///
188 189 /// * `Some(Some(x))`: set `Node::copy_source` to `Some(x)`
189 190 /// * `Some(None)`: set `Node::copy_source` to `None`
190 191 /// * `None`: leave `Node::copy_source` unchanged
191 192 fn add_file_node(
192 193 &mut self,
193 194 path: &HgPath,
194 195 new_entry: DirstateEntry,
195 196 new_copy_source: Option<Option<HgPathBuf>>,
196 197 ) {
197 198 let node = Self::get_or_insert_node(&mut self.root, path);
198 199 if node.entry.is_none() {
199 200 self.nodes_with_entry_count += 1
200 201 }
201 202 if let Some(source) = &new_copy_source {
202 203 if node.copy_source.is_none() && source.is_some() {
203 204 self.nodes_with_copy_source_count += 1
204 205 }
205 206 if node.copy_source.is_some() && source.is_none() {
206 207 self.nodes_with_copy_source_count -= 1
207 208 }
208 209 }
209 210 let tracked_count_increment =
210 211 match (node.is_tracked_file(), new_entry.state.is_tracked()) {
211 212 (false, true) => 1,
212 213 (true, false) => -1,
213 214 _ => 0,
214 215 };
215 216
216 217 node.entry = Some(new_entry);
217 218 if let Some(source) = new_copy_source {
218 219 node.copy_source = source
219 220 }
220 221 // Borrow of `self.root` through `node` ends here
221 222
222 223 match tracked_count_increment {
223 224 1 => self.for_each_ancestor_node(path, |node| {
224 225 node.tracked_descendants_count += 1
225 226 }),
226 227 // We can’t use `+= -1` because the counter is unsigned
227 228 -1 => self.for_each_ancestor_node(path, |node| {
228 229 node.tracked_descendants_count -= 1
229 230 }),
230 231 _ => {}
231 232 }
232 233 }
233 234
234 235 fn iter_nodes<'a>(
235 236 &'a self,
236 237 ) -> impl Iterator<Item = (&'a WithBasename<HgPathBuf>, &'a Node)> + 'a
237 238 {
238 239 // Depth first tree traversal.
239 240 //
240 241 // If we could afford internal iteration and recursion,
241 242 // this would look like:
242 243 //
243 244 // ```
244 245 // fn traverse_children(
245 246 // children: &ChildNodes,
246 247 // each: &mut impl FnMut(&Node),
247 248 // ) {
248 249 // for child in children.values() {
249 250 // traverse_children(&child.children, each);
250 251 // each(child);
251 252 // }
252 253 // }
253 254 // ```
254 255 //
255 256 // However we want an external iterator and therefore can’t use the
256 257 // call stack. Use an explicit stack instead:
257 258 let mut stack = Vec::new();
258 259 let mut iter = self.root.iter();
259 260 std::iter::from_fn(move || {
260 261 while let Some((key, child_node)) = iter.next() {
261 262 // Pseudo-recursion
262 263 let new_iter = child_node.children.iter();
263 264 let old_iter = std::mem::replace(&mut iter, new_iter);
264 265 stack.push((key, child_node, old_iter));
265 266 }
266 267 // Found the end of a `children.iter()` iterator.
267 268 if let Some((key, child_node, next_iter)) = stack.pop() {
268 269 // "Return" from pseudo-recursion by restoring state from the
269 270 // explicit stack
270 271 iter = next_iter;
271 272
272 273 Some((key, child_node))
273 274 } else {
274 275 // Reached the bottom of the stack, we’re done
275 276 None
276 277 }
277 278 })
278 279 }
279 280
280 281 /// Mutable iterator for the `(entry, copy source)` of each node.
281 282 ///
282 283 /// It would not be safe to yield mutable references to nodes themeselves
283 284 /// with `-> impl Iterator<Item = &mut Node>` since child nodes are
284 285 /// reachable from their ancestor nodes, potentially creating multiple
285 286 /// `&mut` references to a given node.
286 287 fn iter_node_data_mut<'a>(
287 288 &'a mut self,
288 289 ) -> impl Iterator<Item = NodeDataMut<'a>> + 'a {
289 290 // Explict stack for pseudo-recursion, see `iter_nodes` above.
290 291 let mut stack = Vec::new();
291 292 let mut iter = self.root.iter_mut();
292 293 std::iter::from_fn(move || {
293 294 while let Some((key, child_node)) = iter.next() {
294 295 // Pseudo-recursion
295 296 let data =
296 297 (key, &mut child_node.entry, &mut child_node.copy_source);
297 298 let new_iter = child_node.children.iter_mut();
298 299 let old_iter = std::mem::replace(&mut iter, new_iter);
299 300 stack.push((data, old_iter));
300 301 }
301 302 // Found the end of a `children.values_mut()` iterator.
302 303 if let Some((data, next_iter)) = stack.pop() {
303 304 // "Return" from pseudo-recursion by restoring state from the
304 305 // explicit stack
305 306 iter = next_iter;
306 307
307 308 Some(data)
308 309 } else {
309 310 // Reached the bottom of the stack, we’re done
310 311 None
311 312 }
312 313 })
313 314 }
314 315 }
315 316
316 317 impl super::dispatch::DirstateMapMethods for DirstateMap {
317 318 fn clear(&mut self) {
318 319 self.set_parents(&DirstateParents {
319 320 p1: NULL_NODE,
320 321 p2: NULL_NODE,
321 322 });
322 323 self.root.clear();
323 324 self.nodes_with_entry_count = 0;
324 325 self.nodes_with_copy_source_count = 0;
325 326 }
326 327
327 328 fn add_file(
328 329 &mut self,
329 330 filename: &HgPath,
330 331 _old_state: EntryState,
331 332 entry: DirstateEntry,
332 333 ) -> Result<(), DirstateMapError> {
333 334 self.add_file_node(filename, entry, None);
334 335 Ok(())
335 336 }
336 337
337 338 fn remove_file(
338 339 &mut self,
339 340 filename: &HgPath,
340 341 _old_state: EntryState,
341 342 size: i32,
342 343 ) -> Result<(), DirstateMapError> {
343 344 let entry = DirstateEntry {
344 345 state: EntryState::Removed,
345 346 mode: 0,
346 347 size,
347 348 mtime: 0,
348 349 };
349 350 self.add_file_node(filename, entry, None);
350 351 Ok(())
351 352 }
352 353
353 354 fn drop_file(
354 355 &mut self,
355 356 filename: &HgPath,
356 357 _old_state: EntryState,
357 358 ) -> Result<bool, DirstateMapError> {
358 359 if let Some(node) = Self::get_node_mut(&mut self.root, filename) {
359 360 let was_tracked = node.is_tracked_file();
360 361 let had_entry = node.entry.is_some();
361 362 let had_copy_source = node.copy_source.is_some();
362 363
363 364 // TODO: this leaves in the tree a "non-file" node. Should we
364 365 // remove the node instead, together with ancestor nodes for
365 366 // directories that become empty?
366 367 node.entry = None;
367 368 node.copy_source = None;
368 369
369 370 if had_entry {
370 371 self.nodes_with_entry_count -= 1
371 372 }
372 373 if had_copy_source {
373 374 self.nodes_with_copy_source_count -= 1
374 375 }
375 376 if was_tracked {
376 377 self.for_each_ancestor_node(filename, |node| {
377 378 node.tracked_descendants_count -= 1
378 379 })
379 380 }
380 381 Ok(had_entry)
381 382 } else {
382 383 Ok(false)
383 384 }
384 385 }
385 386
386 387 fn clear_ambiguous_times(&mut self, filenames: Vec<HgPathBuf>, now: i32) {
387 388 for filename in filenames {
388 389 if let Some(node) = Self::get_node_mut(&mut self.root, &filename) {
389 390 if let Some(entry) = node.entry.as_mut() {
390 391 clear_ambiguous_mtime(entry, now);
391 392 }
392 393 }
393 394 }
394 395 }
395 396
396 397 fn non_normal_entries_contains(&mut self, key: &HgPath) -> bool {
397 398 self.get_node(key)
398 399 .and_then(|node| node.entry.as_ref())
399 400 .map_or(false, DirstateEntry::is_non_normal)
400 401 }
401 402
402 403 fn non_normal_entries_remove(&mut self, _key: &HgPath) {
403 404 // Do nothing, this `DirstateMap` does not have a separate "non normal
404 405 // entries" set that need to be kept up to date
405 406 }
406 407
407 408 fn non_normal_or_other_parent_paths(
408 409 &mut self,
409 410 ) -> Box<dyn Iterator<Item = &HgPathBuf> + '_> {
410 411 Box::new(self.iter_nodes().filter_map(|(path, node)| {
411 412 node.entry
412 413 .as_ref()
413 414 .filter(|entry| {
414 415 entry.is_non_normal() || entry.is_from_other_parent()
415 416 })
416 417 .map(|_| path.full_path())
417 418 }))
418 419 }
419 420
420 421 fn set_non_normal_other_parent_entries(&mut self, _force: bool) {
421 422 // Do nothing, this `DirstateMap` does not have a separate "non normal
422 423 // entries" and "from other parent" sets that need to be recomputed
423 424 }
424 425
425 426 fn iter_non_normal_paths(
426 427 &mut self,
427 428 ) -> Box<dyn Iterator<Item = &HgPathBuf> + Send + '_> {
428 429 self.iter_non_normal_paths_panic()
429 430 }
430 431
431 432 fn iter_non_normal_paths_panic(
432 433 &self,
433 434 ) -> Box<dyn Iterator<Item = &HgPathBuf> + Send + '_> {
434 435 Box::new(self.iter_nodes().filter_map(|(path, node)| {
435 436 node.entry
436 437 .as_ref()
437 438 .filter(|entry| entry.is_non_normal())
438 439 .map(|_| path.full_path())
439 440 }))
440 441 }
441 442
442 443 fn iter_other_parent_paths(
443 444 &mut self,
444 445 ) -> Box<dyn Iterator<Item = &HgPathBuf> + Send + '_> {
445 446 Box::new(self.iter_nodes().filter_map(|(path, node)| {
446 447 node.entry
447 448 .as_ref()
448 449 .filter(|entry| entry.is_from_other_parent())
449 450 .map(|_| path.full_path())
450 451 }))
451 452 }
452 453
453 454 fn has_tracked_dir(
454 455 &mut self,
455 456 directory: &HgPath,
456 457 ) -> Result<bool, DirstateMapError> {
457 458 if let Some(node) = self.get_node(directory) {
458 459 // A node without a `DirstateEntry` was created to hold child
459 460 // nodes, and is therefore a directory.
460 461 Ok(node.entry.is_none() && node.tracked_descendants_count > 0)
461 462 } else {
462 463 Ok(false)
463 464 }
464 465 }
465 466
466 467 fn has_dir(
467 468 &mut self,
468 469 directory: &HgPath,
469 470 ) -> Result<bool, DirstateMapError> {
470 471 if let Some(node) = self.get_node(directory) {
471 472 // A node without a `DirstateEntry` was created to hold child
472 473 // nodes, and is therefore a directory.
473 474 Ok(node.entry.is_none())
474 475 } else {
475 476 Ok(false)
476 477 }
477 478 }
478 479
479 480 fn parents(
480 481 &mut self,
481 482 file_contents: &[u8],
482 483 ) -> Result<&DirstateParents, DirstateError> {
483 484 if self.parents.is_none() {
484 485 let parents = if !file_contents.is_empty() {
485 486 parse_dirstate_parents(file_contents)?.clone()
486 487 } else {
487 488 DirstateParents {
488 489 p1: NULL_NODE,
489 490 p2: NULL_NODE,
490 491 }
491 492 };
492 493 self.parents = Some(parents);
493 494 }
494 495 Ok(self.parents.as_ref().unwrap())
495 496 }
496 497
497 498 fn set_parents(&mut self, parents: &DirstateParents) {
498 499 self.parents = Some(parents.clone());
499 500 self.dirty_parents = true;
500 501 }
501 502
503 #[timed]
502 504 fn read<'a>(
503 505 &mut self,
504 506 file_contents: &'a [u8],
505 507 ) -> Result<Option<&'a DirstateParents>, DirstateError> {
506 508 if file_contents.is_empty() {
507 509 return Ok(None);
508 510 }
509 511
510 512 let parents = parse_dirstate_entries(
511 513 file_contents,
512 514 |path, entry, copy_source| {
513 515 self.add_file_node(
514 516 path,
515 517 *entry,
516 518 Some(copy_source.map(HgPath::to_owned)),
517 519 )
518 520 },
519 521 )?;
520 522
521 523 if !self.dirty_parents {
522 524 self.set_parents(parents);
523 525 }
524 526
525 527 Ok(Some(parents))
526 528 }
527 529
528 530 fn pack(
529 531 &mut self,
530 532 parents: DirstateParents,
531 533 now: Timestamp,
532 534 ) -> Result<Vec<u8>, DirstateError> {
533 535 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
534 536 // reallocations
535 537 let mut size = parents.as_bytes().len();
536 538 for (path, node) in self.iter_nodes() {
537 539 if node.entry.is_some() {
538 540 size += packed_entry_size(
539 541 path.full_path(),
540 542 node.copy_source.as_ref(),
541 543 )
542 544 }
543 545 }
544 546
545 547 let mut packed = Vec::with_capacity(size);
546 548 packed.extend(parents.as_bytes());
547 549
548 550 let now: i32 = now.0.try_into().expect("time overflow");
549 551 for (path, opt_entry, copy_source) in self.iter_node_data_mut() {
550 552 if let Some(entry) = opt_entry {
551 553 clear_ambiguous_mtime(entry, now);
552 554 pack_entry(
553 555 path.full_path(),
554 556 entry,
555 557 copy_source.as_ref(),
556 558 &mut packed,
557 559 );
558 560 }
559 561 }
560 562 self.dirty_parents = false;
561 563 Ok(packed)
562 564 }
563 565
564 566 fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> {
565 567 // Do nothing, this `DirstateMap` does not a separate `all_dirs` that
566 568 // needs to be recomputed
567 569 Ok(())
568 570 }
569 571
570 572 fn set_dirs(&mut self) -> Result<(), DirstateMapError> {
571 573 // Do nothing, this `DirstateMap` does not a separate `dirs` that needs
572 574 // to be recomputed
573 575 Ok(())
574 576 }
575 577
576 578 fn status<'a>(
577 579 &'a mut self,
578 580 matcher: &'a (dyn Matcher + Sync),
579 581 root_dir: PathBuf,
580 582 ignore_files: Vec<PathBuf>,
581 583 options: StatusOptions,
582 584 ) -> Result<(DirstateStatus<'a>, Vec<PatternFileWarning>), StatusError>
583 585 {
584 586 super::status::status(self, matcher, root_dir, ignore_files, options)
585 587 }
586 588
587 589 fn copy_map_len(&self) -> usize {
588 590 self.nodes_with_copy_source_count
589 591 }
590 592
591 593 fn copy_map_iter(&self) -> CopyMapIter<'_> {
592 594 Box::new(self.iter_nodes().filter_map(|(path, node)| {
593 595 node.copy_source
594 596 .as_ref()
595 597 .map(|copy_source| (path.full_path(), copy_source))
596 598 }))
597 599 }
598 600
599 601 fn copy_map_contains_key(&self, key: &HgPath) -> bool {
600 602 if let Some(node) = self.get_node(key) {
601 603 node.copy_source.is_some()
602 604 } else {
603 605 false
604 606 }
605 607 }
606 608
607 609 fn copy_map_get(&self, key: &HgPath) -> Option<&HgPathBuf> {
608 610 self.get_node(key)?.copy_source.as_ref()
609 611 }
610 612
611 613 fn copy_map_remove(&mut self, key: &HgPath) -> Option<HgPathBuf> {
612 614 let count = &mut self.nodes_with_copy_source_count;
613 615 Self::get_node_mut(&mut self.root, key).and_then(|node| {
614 616 if node.copy_source.is_some() {
615 617 *count -= 1
616 618 }
617 619 node.copy_source.take()
618 620 })
619 621 }
620 622
621 623 fn copy_map_insert(
622 624 &mut self,
623 625 key: HgPathBuf,
624 626 value: HgPathBuf,
625 627 ) -> Option<HgPathBuf> {
626 628 let node = Self::get_or_insert_node(&mut self.root, &key);
627 629 if node.copy_source.is_none() {
628 630 self.nodes_with_copy_source_count += 1
629 631 }
630 632 node.copy_source.replace(value)
631 633 }
632 634
633 635 fn len(&self) -> usize {
634 636 self.nodes_with_entry_count
635 637 }
636 638
637 639 fn contains_key(&self, key: &HgPath) -> bool {
638 640 self.get(key).is_some()
639 641 }
640 642
641 643 fn get(&self, key: &HgPath) -> Option<&DirstateEntry> {
642 644 self.get_node(key)?.entry.as_ref()
643 645 }
644 646
645 647 fn iter(&self) -> StateMapIter<'_> {
646 648 Box::new(self.iter_nodes().filter_map(|(path, node)| {
647 649 node.entry.as_ref().map(|entry| (path.full_path(), entry))
648 650 }))
649 651 }
650 652 }
@@ -1,426 +1,428 b''
1 1 use crate::dirstate::status::IgnoreFnType;
2 2 use crate::dirstate_tree::dirstate_map::ChildNodes;
3 3 use crate::dirstate_tree::dirstate_map::DirstateMap;
4 4 use crate::dirstate_tree::dirstate_map::Node;
5 5 use crate::matchers::get_ignore_function;
6 6 use crate::matchers::Matcher;
7 7 use crate::utils::files::get_bytes_from_os_string;
8 8 use crate::utils::hg_path::HgPath;
9 9 use crate::BadMatch;
10 10 use crate::DirstateStatus;
11 11 use crate::EntryState;
12 12 use crate::HgPathBuf;
13 13 use crate::PatternFileWarning;
14 14 use crate::StatusError;
15 15 use crate::StatusOptions;
16 use micro_timer::timed;
16 17 use rayon::prelude::*;
17 18 use std::borrow::Cow;
18 19 use std::io;
19 20 use std::path::Path;
20 21 use std::path::PathBuf;
21 22 use std::sync::Mutex;
22 23
23 24 /// Returns the status of the working directory compared to its parent
24 25 /// changeset.
25 26 ///
26 27 /// This algorithm is based on traversing the filesystem tree (`fs` in function
27 28 /// and variable names) and dirstate tree at the same time. The core of this
28 29 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
29 30 /// and its use of `itertools::merge_join_by`. When reaching a path that only
30 31 /// exists in one of the two trees, depending on information requested by
31 32 /// `options` we may need to traverse the remaining subtree.
33 #[timed]
32 34 pub fn status<'tree>(
33 35 dmap: &'tree mut DirstateMap,
34 36 matcher: &(dyn Matcher + Sync),
35 37 root_dir: PathBuf,
36 38 ignore_files: Vec<PathBuf>,
37 39 options: StatusOptions,
38 40 ) -> Result<(DirstateStatus<'tree>, Vec<PatternFileWarning>), StatusError> {
39 41 let (ignore_fn, warnings): (IgnoreFnType, _) =
40 42 if options.list_ignored || options.list_unknown {
41 43 get_ignore_function(ignore_files, &root_dir)?
42 44 } else {
43 45 (Box::new(|&_| true), vec![])
44 46 };
45 47
46 48 let common = StatusCommon {
47 49 options,
48 50 matcher,
49 51 ignore_fn,
50 52 outcome: Mutex::new(DirstateStatus::default()),
51 53 };
52 54 let is_at_repo_root = true;
53 55 let hg_path = HgPath::new("");
54 56 let has_ignored_ancestor = false;
55 57 common.traverse_fs_directory_and_dirstate(
56 58 has_ignored_ancestor,
57 59 &mut dmap.root,
58 60 hg_path,
59 61 &root_dir,
60 62 is_at_repo_root,
61 63 );
62 64 Ok((common.outcome.into_inner().unwrap(), warnings))
63 65 }
64 66
65 67 /// Bag of random things needed by various parts of the algorithm. Reduces the
66 68 /// number of parameters passed to functions.
67 69 struct StatusCommon<'tree, 'a> {
68 70 options: StatusOptions,
69 71 matcher: &'a (dyn Matcher + Sync),
70 72 ignore_fn: IgnoreFnType<'a>,
71 73 outcome: Mutex<DirstateStatus<'tree>>,
72 74 }
73 75
74 76 impl<'tree, 'a> StatusCommon<'tree, 'a> {
75 77 fn read_dir(
76 78 &self,
77 79 hg_path: &HgPath,
78 80 fs_path: &Path,
79 81 is_at_repo_root: bool,
80 82 ) -> Result<Vec<DirEntry>, ()> {
81 83 DirEntry::read_dir(fs_path, is_at_repo_root).map_err(|error| {
82 84 let errno = error.raw_os_error().expect("expected real OS error");
83 85 self.outcome
84 86 .lock()
85 87 .unwrap()
86 88 .bad
87 89 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
88 90 })
89 91 }
90 92
91 93 fn traverse_fs_directory_and_dirstate(
92 94 &self,
93 95 has_ignored_ancestor: bool,
94 96 dirstate_nodes: &'tree mut ChildNodes,
95 97 directory_hg_path: &'tree HgPath,
96 98 directory_fs_path: &Path,
97 99 is_at_repo_root: bool,
98 100 ) {
99 101 let mut fs_entries = if let Ok(entries) = self.read_dir(
100 102 directory_hg_path,
101 103 directory_fs_path,
102 104 is_at_repo_root,
103 105 ) {
104 106 entries
105 107 } else {
106 108 return;
107 109 };
108 110
109 111 // `merge_join_by` requires both its input iterators to be sorted:
110 112
111 113 //
112 114 // * `BTreeMap` iterates according to keys’ ordering by definition
113 115
114 116 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
115 117 // https://github.com/rust-lang/rust/issues/34162
116 118 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
117 119
118 120 itertools::merge_join_by(
119 121 dirstate_nodes,
120 122 &fs_entries,
121 123 |(full_path, _node), fs_entry| {
122 124 full_path.base_name().cmp(&fs_entry.base_name)
123 125 },
124 126 )
125 127 .par_bridge()
126 128 .for_each(|pair| {
127 129 use itertools::EitherOrBoth::*;
128 130 match pair {
129 131 Both((hg_path, dirstate_node), fs_entry) => {
130 132 self.traverse_fs_and_dirstate(
131 133 fs_entry,
132 134 hg_path.full_path(),
133 135 dirstate_node,
134 136 has_ignored_ancestor,
135 137 );
136 138 }
137 139 Left((hg_path, dirstate_node)) => self.traverse_dirstate_only(
138 140 hg_path.full_path(),
139 141 dirstate_node,
140 142 ),
141 143 Right(fs_entry) => self.traverse_fs_only(
142 144 has_ignored_ancestor,
143 145 directory_hg_path,
144 146 fs_entry,
145 147 ),
146 148 }
147 149 })
148 150 }
149 151
150 152 fn traverse_fs_and_dirstate(
151 153 &self,
152 154 fs_entry: &DirEntry,
153 155 hg_path: &'tree HgPath,
154 156 dirstate_node: &'tree mut Node,
155 157 has_ignored_ancestor: bool,
156 158 ) {
157 159 let file_type = fs_entry.metadata.file_type();
158 160 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
159 161 if !file_or_symlink {
160 162 // If we previously had a file here, it was removed (with
161 163 // `hg rm` or similar) or deleted before it could be
162 164 // replaced by a directory or something else.
163 165 self.mark_removed_or_deleted_if_file(
164 166 hg_path,
165 167 dirstate_node.state(),
166 168 );
167 169 }
168 170 if file_type.is_dir() {
169 171 if self.options.collect_traversed_dirs {
170 172 self.outcome.lock().unwrap().traversed.push(hg_path.into())
171 173 }
172 174 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
173 175 let is_at_repo_root = false;
174 176 self.traverse_fs_directory_and_dirstate(
175 177 is_ignored,
176 178 &mut dirstate_node.children,
177 179 hg_path,
178 180 &fs_entry.full_path,
179 181 is_at_repo_root,
180 182 );
181 183 } else {
182 184 if file_or_symlink && self.matcher.matches(hg_path) {
183 185 let full_path = Cow::from(hg_path);
184 186 if let Some(entry) = &dirstate_node.entry {
185 187 match entry.state {
186 188 EntryState::Added => {
187 189 self.outcome.lock().unwrap().added.push(full_path)
188 190 }
189 191 EntryState::Removed => self
190 192 .outcome
191 193 .lock()
192 194 .unwrap()
193 195 .removed
194 196 .push(full_path),
195 197 EntryState::Merged => self
196 198 .outcome
197 199 .lock()
198 200 .unwrap()
199 201 .modified
200 202 .push(full_path),
201 203 EntryState::Normal => {
202 204 self.handle_normal_file(
203 205 full_path,
204 206 dirstate_node,
205 207 entry,
206 208 fs_entry,
207 209 );
208 210 }
209 211 // This variant is not used in DirstateMap
210 212 // nodes
211 213 EntryState::Unknown => unreachable!(),
212 214 }
213 215 } else {
214 216 // `node.entry.is_none()` indicates a "directory"
215 217 // node, but the filesystem has a file
216 218 self.mark_unknown_or_ignored(
217 219 has_ignored_ancestor,
218 220 full_path,
219 221 )
220 222 }
221 223 }
222 224
223 225 for (child_hg_path, child_node) in &mut dirstate_node.children {
224 226 self.traverse_dirstate_only(
225 227 child_hg_path.full_path(),
226 228 child_node,
227 229 )
228 230 }
229 231 }
230 232 }
231 233
232 234 /// A file with `EntryState::Normal` in the dirstate was found in the
233 235 /// filesystem
234 236 fn handle_normal_file(
235 237 &self,
236 238 full_path: Cow<'tree, HgPath>,
237 239 dirstate_node: &Node,
238 240 entry: &crate::DirstateEntry,
239 241 fs_entry: &DirEntry,
240 242 ) {
241 243 // Keep the low 31 bits
242 244 fn truncate_u64(value: u64) -> i32 {
243 245 (value & 0x7FFF_FFFF) as i32
244 246 }
245 247 fn truncate_i64(value: i64) -> i32 {
246 248 (value & 0x7FFF_FFFF) as i32
247 249 }
248 250
249 251 let mode_changed = || {
250 252 self.options.check_exec && entry.mode_changed(&fs_entry.metadata)
251 253 };
252 254 let size_changed = entry.size != truncate_u64(fs_entry.metadata.len());
253 255 if entry.size >= 0
254 256 && size_changed
255 257 && fs_entry.metadata.file_type().is_symlink()
256 258 {
257 259 // issue6456: Size returned may be longer due to encryption
258 260 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
259 261 self.outcome.lock().unwrap().unsure.push(full_path)
260 262 } else if dirstate_node.copy_source.is_some()
261 263 || entry.is_from_other_parent()
262 264 || (entry.size >= 0 && (size_changed || mode_changed()))
263 265 {
264 266 self.outcome.lock().unwrap().modified.push(full_path)
265 267 } else {
266 268 let mtime = mtime_seconds(&fs_entry.metadata);
267 269 if truncate_i64(mtime) != entry.mtime
268 270 || mtime == self.options.last_normal_time
269 271 {
270 272 self.outcome.lock().unwrap().unsure.push(full_path)
271 273 } else if self.options.list_clean {
272 274 self.outcome.lock().unwrap().clean.push(full_path)
273 275 }
274 276 }
275 277 }
276 278
277 279 /// A node in the dirstate tree has no corresponding filesystem entry
278 280 fn traverse_dirstate_only(
279 281 &self,
280 282 hg_path: &'tree HgPath,
281 283 dirstate_node: &'tree mut Node,
282 284 ) {
283 285 self.mark_removed_or_deleted_if_file(hg_path, dirstate_node.state());
284 286 dirstate_node.children.par_iter_mut().for_each(
285 287 |(child_hg_path, child_node)| {
286 288 self.traverse_dirstate_only(
287 289 child_hg_path.full_path(),
288 290 child_node,
289 291 )
290 292 },
291 293 )
292 294 }
293 295
294 296 /// A node in the dirstate tree has no corresponding *file* on the
295 297 /// filesystem
296 298 ///
297 299 /// Does nothing on a "directory" node
298 300 fn mark_removed_or_deleted_if_file(
299 301 &self,
300 302 hg_path: &'tree HgPath,
301 303 dirstate_node_state: Option<EntryState>,
302 304 ) {
303 305 if let Some(state) = dirstate_node_state {
304 306 if self.matcher.matches(hg_path) {
305 307 if let EntryState::Removed = state {
306 308 self.outcome.lock().unwrap().removed.push(hg_path.into())
307 309 } else {
308 310 self.outcome.lock().unwrap().deleted.push(hg_path.into())
309 311 }
310 312 }
311 313 }
312 314 }
313 315
314 316 /// Something in the filesystem has no corresponding dirstate node
315 317 fn traverse_fs_only(
316 318 &self,
317 319 has_ignored_ancestor: bool,
318 320 directory_hg_path: &HgPath,
319 321 fs_entry: &DirEntry,
320 322 ) {
321 323 let hg_path = directory_hg_path.join(&fs_entry.base_name);
322 324 let file_type = fs_entry.metadata.file_type();
323 325 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
324 326 if file_type.is_dir() {
325 327 let is_ignored =
326 328 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
327 329 let traverse_children = if is_ignored {
328 330 // Descendants of an ignored directory are all ignored
329 331 self.options.list_ignored
330 332 } else {
331 333 // Descendants of an unknown directory may be either unknown or
332 334 // ignored
333 335 self.options.list_unknown || self.options.list_ignored
334 336 };
335 337 if traverse_children {
336 338 let is_at_repo_root = false;
337 339 if let Ok(children_fs_entries) = self.read_dir(
338 340 &hg_path,
339 341 &fs_entry.full_path,
340 342 is_at_repo_root,
341 343 ) {
342 344 children_fs_entries.par_iter().for_each(|child_fs_entry| {
343 345 self.traverse_fs_only(
344 346 is_ignored,
345 347 &hg_path,
346 348 child_fs_entry,
347 349 )
348 350 })
349 351 }
350 352 }
351 353 if self.options.collect_traversed_dirs {
352 354 self.outcome.lock().unwrap().traversed.push(hg_path.into())
353 355 }
354 356 } else if file_or_symlink && self.matcher.matches(&hg_path) {
355 357 self.mark_unknown_or_ignored(has_ignored_ancestor, hg_path.into())
356 358 }
357 359 }
358 360
359 361 fn mark_unknown_or_ignored(
360 362 &self,
361 363 has_ignored_ancestor: bool,
362 364 hg_path: Cow<'tree, HgPath>,
363 365 ) {
364 366 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
365 367 if is_ignored {
366 368 if self.options.list_ignored {
367 369 self.outcome.lock().unwrap().ignored.push(hg_path)
368 370 }
369 371 } else {
370 372 if self.options.list_unknown {
371 373 self.outcome.lock().unwrap().unknown.push(hg_path)
372 374 }
373 375 }
374 376 }
375 377 }
376 378
377 379 #[cfg(unix)] // TODO
378 380 fn mtime_seconds(metadata: &std::fs::Metadata) -> i64 {
379 381 // Going through `Metadata::modified()` would be portable, but would take
380 382 // care to construct a `SystemTime` value with sub-second precision just
381 383 // for us to throw that away here.
382 384 use std::os::unix::fs::MetadataExt;
383 385 metadata.mtime()
384 386 }
385 387
386 388 struct DirEntry {
387 389 base_name: HgPathBuf,
388 390 full_path: PathBuf,
389 391 metadata: std::fs::Metadata,
390 392 }
391 393
392 394 impl DirEntry {
393 395 /// Returns **unsorted** entries in the given directory, with name and
394 396 /// metadata.
395 397 ///
396 398 /// If a `.hg` sub-directory is encountered:
397 399 ///
398 400 /// * At the repository root, ignore that sub-directory
399 401 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
400 402 /// list instead.
401 403 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
402 404 let mut results = Vec::new();
403 405 for entry in path.read_dir()? {
404 406 let entry = entry?;
405 407 let metadata = entry.metadata()?;
406 408 let name = get_bytes_from_os_string(entry.file_name());
407 409 // FIXME don't do this when cached
408 410 if name == b".hg" {
409 411 if is_at_repo_root {
410 412 // Skip the repo’s own .hg (might be a symlink)
411 413 continue;
412 414 } else if metadata.is_dir() {
413 415 // A .hg sub-directory at another location means a subrepo,
414 416 // skip it entirely.
415 417 return Ok(Vec::new());
416 418 }
417 419 }
418 420 results.push(DirEntry {
419 421 base_name: name.into(),
420 422 full_path: entry.path(),
421 423 metadata,
422 424 })
423 425 }
424 426 Ok(results)
425 427 }
426 428 }
General Comments 0
You need to be logged in to leave comments. Login now