Show More
@@ -29,6 +29,10 b' use crate::StateMapIter;' | |||
|
29 | 29 | use crate::StatusError; |
|
30 | 30 | use crate::StatusOptions; |
|
31 | 31 | |
|
32 | /// Append to an existing data file if the amount of unreachable data (not used | |
|
33 | /// anymore) is less than this fraction of the total amount of existing data. | |
|
34 | const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5; | |
|
35 | ||
|
32 | 36 | pub struct DirstateMap<'on_disk> { |
|
33 | 37 | /// Contents of the `.hg/dirstate` file |
|
34 | 38 | pub(super) on_disk: &'on_disk [u8], |
@@ -44,6 +48,9 b" pub struct DirstateMap<'on_disk> {" | |||
|
44 | 48 | |
|
45 | 49 | /// See on_disk::Header |
|
46 | 50 | pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash, |
|
51 | ||
|
52 | /// How many bytes of `on_disk` are not used anymore | |
|
53 | pub(super) unreachable_bytes: u32, | |
|
47 | 54 | } |
|
48 | 55 | |
|
49 | 56 | /// Using a plain `HgPathBuf` of the full path from the repository root as a |
@@ -118,9 +125,10 b" impl<'on_disk> ChildNodes<'on_disk> {" | |||
|
118 | 125 | } |
|
119 | 126 | } |
|
120 | 127 | |
|
121 |
|
|
|
128 | fn make_mut( | |
|
122 | 129 | &mut self, |
|
123 | 130 | on_disk: &'on_disk [u8], |
|
131 | unreachable_bytes: &mut u32, | |
|
124 | 132 | ) -> Result< |
|
125 | 133 | &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>, |
|
126 | 134 | DirstateV2ParseError, |
@@ -128,6 +136,8 b" impl<'on_disk> ChildNodes<'on_disk> {" | |||
|
128 | 136 | match self { |
|
129 | 137 | ChildNodes::InMemory(nodes) => Ok(nodes), |
|
130 | 138 | ChildNodes::OnDisk(nodes) => { |
|
139 | *unreachable_bytes += | |
|
140 | std::mem::size_of_val::<[on_disk::Node]>(nodes) as u32; | |
|
131 | 141 | let nodes = nodes |
|
132 | 142 | .iter() |
|
133 | 143 | .map(|node| { |
@@ -406,6 +416,7 b" impl<'on_disk> DirstateMap<'on_disk> {" | |||
|
406 | 416 | nodes_with_entry_count: 0, |
|
407 | 417 | nodes_with_copy_source_count: 0, |
|
408 | 418 | ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN], |
|
419 | unreachable_bytes: 0, | |
|
409 | 420 | } |
|
410 | 421 | } |
|
411 | 422 | |
@@ -436,6 +447,7 b" impl<'on_disk> DirstateMap<'on_disk> {" | |||
|
436 | 447 | let tracked = entry.state.is_tracked(); |
|
437 | 448 | let node = Self::get_or_insert_node( |
|
438 | 449 | map.on_disk, |
|
450 | &mut map.unreachable_bytes, | |
|
439 | 451 | &mut map.root, |
|
440 | 452 | path, |
|
441 | 453 | WithBasename::to_cow_borrowed, |
@@ -472,18 +484,8 b" impl<'on_disk> DirstateMap<'on_disk> {" | |||
|
472 | 484 | /// append to the existing data file that contains `self.on_disk` (true), |
|
473 | 485 | /// or create a new data file from scratch (false). |
|
474 | 486 | pub(super) fn write_should_append(&self) -> bool { |
|
475 | // Soon this will be a heuristic based on the amount of unreachable | |
|
476 | // data. For now it’s pseudo-random in order to make tests exercise | |
|
477 | // both code paths. | |
|
478 | ||
|
479 | fn bad_rng() -> u32 { | |
|
480 | std::time::SystemTime::now() | |
|
481 | .duration_since(std::time::UNIX_EPOCH) | |
|
482 | .unwrap() | |
|
483 | .subsec_millis() | |
|
484 | } | |
|
485 | ||
|
486 | bad_rng() % 2 == 0 | |
|
487 | let ratio = self.unreachable_bytes as f32 / self.on_disk.len() as f32; | |
|
488 | ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO | |
|
487 | 489 | } |
|
488 | 490 | |
|
489 | 491 | fn get_node<'tree>( |
@@ -514,6 +516,7 b" impl<'on_disk> DirstateMap<'on_disk> {" | |||
|
514 | 516 | /// other fields while the returned borrow is still valid |
|
515 | 517 | fn get_node_mut<'tree>( |
|
516 | 518 | on_disk: &'on_disk [u8], |
|
519 | unreachable_bytes: &mut u32, | |
|
517 | 520 | root: &'tree mut ChildNodes<'on_disk>, |
|
518 | 521 | path: &HgPath, |
|
519 | 522 | ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> { |
@@ -522,7 +525,9 b" impl<'on_disk> DirstateMap<'on_disk> {" | |||
|
522 | 525 | let mut component = |
|
523 | 526 | components.next().expect("expected at least one components"); |
|
524 | 527 | loop { |
|
525 |
if let Some(child) = children |
|
|
528 | if let Some(child) = children | |
|
529 | .make_mut(on_disk, unreachable_bytes)? | |
|
530 | .get_mut(component) | |
|
526 | 531 | { |
|
527 | 532 | if let Some(next_component) = components.next() { |
|
528 | 533 | component = next_component; |
@@ -542,6 +547,7 b" impl<'on_disk> DirstateMap<'on_disk> {" | |||
|
542 | 547 | ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> { |
|
543 | 548 | Self::get_or_insert_node( |
|
544 | 549 | self.on_disk, |
|
550 | &mut self.unreachable_bytes, | |
|
545 | 551 | &mut self.root, |
|
546 | 552 | path, |
|
547 | 553 | WithBasename::to_cow_owned, |
@@ -549,8 +555,9 b" impl<'on_disk> DirstateMap<'on_disk> {" | |||
|
549 | 555 | ) |
|
550 | 556 | } |
|
551 | 557 | |
|
552 |
|
|
|
558 | fn get_or_insert_node<'tree, 'path>( | |
|
553 | 559 | on_disk: &'on_disk [u8], |
|
560 | unreachable_bytes: &mut u32, | |
|
554 | 561 | root: &'tree mut ChildNodes<'on_disk>, |
|
555 | 562 | path: &'path HgPath, |
|
556 | 563 | to_cow: impl Fn( |
@@ -569,7 +576,7 b" impl<'on_disk> DirstateMap<'on_disk> {" | |||
|
569 | 576 | // map already contains that key, without introducing double |
|
570 | 577 | // lookup? |
|
571 | 578 | let child_node = child_nodes |
|
572 | .make_mut(on_disk)? | |
|
579 | .make_mut(on_disk, unreachable_bytes)? | |
|
573 | 580 | .entry(to_cow(ancestor_path)) |
|
574 | 581 | .or_default(); |
|
575 | 582 | if let Some(next) = inclusive_ancestor_paths.next() { |
@@ -598,6 +605,7 b" impl<'on_disk> DirstateMap<'on_disk> {" | |||
|
598 | 605 | |
|
599 | 606 | let node = Self::get_or_insert_node( |
|
600 | 607 | self.on_disk, |
|
608 | &mut self.unreachable_bytes, | |
|
601 | 609 | &mut self.root, |
|
602 | 610 | path, |
|
603 | 611 | WithBasename::to_cow_owned, |
@@ -681,6 +689,7 b" impl<'on_disk> DirstateMap<'on_disk> {" | |||
|
681 | 689 | for path in paths { |
|
682 | 690 | if let Some(node) = Self::get_node_mut( |
|
683 | 691 | self.on_disk, |
|
692 | &mut self.unreachable_bytes, | |
|
684 | 693 | &mut self.root, |
|
685 | 694 | path.as_ref(), |
|
686 | 695 | )? { |
@@ -712,6 +721,12 b" impl<'on_disk> DirstateMap<'on_disk> {" | |||
|
712 | 721 | Ok(None) |
|
713 | 722 | }) |
|
714 | 723 | } |
|
724 | ||
|
725 | fn count_dropped_path(unreachable_bytes: &mut u32, path: &Cow<HgPath>) { | |
|
726 | if let Cow::Borrowed(path) = path { | |
|
727 | *unreachable_bytes += path.len() as u32 | |
|
728 | } | |
|
729 | } | |
|
715 | 730 | } |
|
716 | 731 | |
|
717 | 732 | /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s. |
@@ -844,13 +859,14 b" impl<'on_disk> super::dispatch::Dirstate" | |||
|
844 | 859 | /// removed a node in `nodes`. |
|
845 | 860 | fn recur<'on_disk>( |
|
846 | 861 | on_disk: &'on_disk [u8], |
|
862 | unreachable_bytes: &mut u32, | |
|
847 | 863 | nodes: &mut ChildNodes<'on_disk>, |
|
848 | 864 | path: &HgPath, |
|
849 | 865 | ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> { |
|
850 | 866 | let (first_path_component, rest_of_path) = |
|
851 | 867 | path.split_first_component(); |
|
852 | let node = if let Some(node) = | |
|
853 |
nodes |
|
|
868 | let nodes = nodes.make_mut(on_disk, unreachable_bytes)?; | |
|
869 | let node = if let Some(node) = nodes.get_mut(first_path_component) | |
|
854 | 870 | { |
|
855 | 871 | node |
|
856 | 872 | } else { |
@@ -858,9 +874,12 b" impl<'on_disk> super::dispatch::Dirstate" | |||
|
858 | 874 | }; |
|
859 | 875 | let dropped; |
|
860 | 876 | if let Some(rest) = rest_of_path { |
|
861 | if let Some((d, removed)) = | |
|
862 |
|
|
|
863 | { | |
|
877 | if let Some((d, removed)) = recur( | |
|
878 | on_disk, | |
|
879 | unreachable_bytes, | |
|
880 | &mut node.children, | |
|
881 | rest, | |
|
882 | )? { | |
|
864 | 883 | dropped = d; |
|
865 | 884 | if dropped.had_entry { |
|
866 | 885 | node.descendants_with_entry_count -= 1; |
@@ -884,6 +903,9 b" impl<'on_disk> super::dispatch::Dirstate" | |||
|
884 | 903 | if had_entry { |
|
885 | 904 | node.data = NodeData::None |
|
886 | 905 | } |
|
906 | if let Some(source) = &node.copy_source { | |
|
907 | DirstateMap::count_dropped_path(unreachable_bytes, source) | |
|
908 | } | |
|
887 | 909 | dropped = Dropped { |
|
888 | 910 | was_tracked: node |
|
889 | 911 | .data |
@@ -899,14 +921,22 b" impl<'on_disk> super::dispatch::Dirstate" | |||
|
899 | 921 | && node.copy_source.is_none() |
|
900 | 922 | && node.children.is_empty(); |
|
901 | 923 | if remove { |
|
902 | nodes.make_mut(on_disk)?.remove(first_path_component); | |
|
924 | let (key, _) = | |
|
925 | nodes.remove_entry(first_path_component).unwrap(); | |
|
926 | DirstateMap::count_dropped_path( | |
|
927 | unreachable_bytes, | |
|
928 | key.full_path(), | |
|
929 | ) | |
|
903 | 930 | } |
|
904 | 931 | Ok(Some((dropped, remove))) |
|
905 | 932 | } |
|
906 | 933 | |
|
907 | if let Some((dropped, _removed)) = | |
|
908 |
|
|
|
909 | { | |
|
934 | if let Some((dropped, _removed)) = recur( | |
|
935 | self.on_disk, | |
|
936 | &mut self.unreachable_bytes, | |
|
937 | &mut self.root, | |
|
938 | filename, | |
|
939 | )? { | |
|
910 | 940 | if dropped.had_entry { |
|
911 | 941 | self.nodes_with_entry_count -= 1 |
|
912 | 942 | } |
@@ -926,9 +956,12 b" impl<'on_disk> super::dispatch::Dirstate" | |||
|
926 | 956 | now: i32, |
|
927 | 957 | ) -> Result<(), DirstateV2ParseError> { |
|
928 | 958 | for filename in filenames { |
|
929 | if let Some(node) = | |
|
930 | Self::get_node_mut(self.on_disk, &mut self.root, &filename)? | |
|
931 | { | |
|
959 | if let Some(node) = Self::get_node_mut( | |
|
960 | self.on_disk, | |
|
961 | &mut self.unreachable_bytes, | |
|
962 | &mut self.root, | |
|
963 | &filename, | |
|
964 | )? { | |
|
932 | 965 | if let NodeData::Entry(entry) = &mut node.data { |
|
933 | 966 | entry.clear_ambiguous_mtime(now); |
|
934 | 967 | } |
@@ -1144,16 +1177,20 b" impl<'on_disk> super::dispatch::Dirstate" | |||
|
1144 | 1177 | key: &HgPath, |
|
1145 | 1178 | ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> { |
|
1146 | 1179 | let count = &mut self.nodes_with_copy_source_count; |
|
1147 | Ok( | |
|
1148 | Self::get_node_mut(self.on_disk, &mut self.root, key)?.and_then( | |
|
1149 | |node| { | |
|
1150 | if node.copy_source.is_some() { | |
|
1151 | *count -= 1 | |
|
1152 |
|
|
|
1153 | node.copy_source.take().map(Cow::into_owned) | |
|
1154 | }, | |
|
1155 | ), | |
|
1156 | ) | |
|
1180 | let unreachable_bytes = &mut self.unreachable_bytes; | |
|
1181 | Ok(Self::get_node_mut( | |
|
1182 | self.on_disk, | |
|
1183 | unreachable_bytes, | |
|
1184 | &mut self.root, | |
|
1185 | key, | |
|
1186 | )? | |
|
1187 | .and_then(|node| { | |
|
1188 | if let Some(source) = &node.copy_source { | |
|
1189 | *count -= 1; | |
|
1190 | Self::count_dropped_path(unreachable_bytes, source); | |
|
1191 | } | |
|
1192 | node.copy_source.take().map(Cow::into_owned) | |
|
1193 | })) | |
|
1157 | 1194 | } |
|
1158 | 1195 | |
|
1159 | 1196 | fn copy_map_insert( |
@@ -1163,6 +1200,7 b" impl<'on_disk> super::dispatch::Dirstate" | |||
|
1163 | 1200 | ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> { |
|
1164 | 1201 | let node = Self::get_or_insert_node( |
|
1165 | 1202 | self.on_disk, |
|
1203 | &mut self.unreachable_bytes, | |
|
1166 | 1204 | &mut self.root, |
|
1167 | 1205 | &key, |
|
1168 | 1206 | WithBasename::to_cow_owned, |
@@ -73,6 +73,9 b' struct Root {' | |||
|
73 | 73 | nodes_with_entry_count: Size, |
|
74 | 74 | nodes_with_copy_source_count: Size, |
|
75 | 75 | |
|
76 | /// How many bytes of this data file are not used anymore | |
|
77 | unreachable_bytes: Size, | |
|
78 | ||
|
76 | 79 | /// If non-zero, a hash of ignore files that were used for some previous |
|
77 | 80 | /// run of the `status` algorithm. |
|
78 | 81 | /// |
@@ -205,7 +208,7 b' type OptPathSlice = PathSlice;' | |||
|
205 | 208 | /// Make sure that size-affecting changes are made knowingly |
|
206 | 209 | fn _static_assert_size_of() { |
|
207 | 210 | let _ = std::mem::transmute::<DocketHeader, [u8; 81]>; |
|
208 |
let _ = std::mem::transmute::<Root, [u8; |
|
|
211 | let _ = std::mem::transmute::<Root, [u8; 40]>; | |
|
209 | 212 | let _ = std::mem::transmute::<Node, [u8; 43]>; |
|
210 | 213 | } |
|
211 | 214 | |
@@ -283,6 +286,9 b" pub(super) fn read<'on_disk>(" | |||
|
283 | 286 | return Ok(DirstateMap::empty(on_disk)); |
|
284 | 287 | } |
|
285 | 288 | let root = read_root(on_disk)?; |
|
289 | let mut unreachable_bytes = root.unreachable_bytes.get(); | |
|
290 | // Each append writes a new `Root`, so it’s never reused | |
|
291 | unreachable_bytes += std::mem::size_of::<Root>() as u32; | |
|
286 | 292 | let dirstate_map = DirstateMap { |
|
287 | 293 | on_disk, |
|
288 | 294 | root: dirstate_map::ChildNodes::OnDisk(read_nodes( |
@@ -292,6 +298,7 b" pub(super) fn read<'on_disk>(" | |||
|
292 | 298 | nodes_with_entry_count: root.nodes_with_entry_count.get(), |
|
293 | 299 | nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(), |
|
294 | 300 | ignore_patterns_hash: root.ignore_patterns_hash, |
|
301 | unreachable_bytes, | |
|
295 | 302 | }; |
|
296 | 303 | Ok(dirstate_map) |
|
297 | 304 | } |
@@ -573,6 +580,7 b' pub(super) fn write(' | |||
|
573 | 580 | nodes_with_copy_source_count: dirstate_map |
|
574 | 581 | .nodes_with_copy_source_count |
|
575 | 582 | .into(), |
|
583 | unreachable_bytes: dirstate_map.unreachable_bytes.into(), | |
|
576 | 584 | ignore_patterns_hash: dirstate_map.ignore_patterns_hash, |
|
577 | 585 | }; |
|
578 | 586 | writer.out.extend(root.as_bytes()); |
General Comments 0
You need to be logged in to leave comments.
Login now