##// END OF EJS Templates
dirstate-v2: Add heuristic for when to create a new data file...
Simon Sapin -
r48481:d9411836 default
parent child Browse files
Show More
@@ -29,6 +29,10 b' use crate::StateMapIter;'
29 29 use crate::StatusError;
30 30 use crate::StatusOptions;
31 31
32 /// Append to an existing data file if the amount of unreachable data (not used
33 /// anymore) is less than this fraction of the total amount of existing data.
34 const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5;
35
32 36 pub struct DirstateMap<'on_disk> {
33 37 /// Contents of the `.hg/dirstate` file
34 38 pub(super) on_disk: &'on_disk [u8],
@@ -44,6 +48,9 b" pub struct DirstateMap<'on_disk> {"
44 48
45 49 /// See on_disk::Header
46 50 pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
51
52 /// How many bytes of `on_disk` are not used anymore
53 pub(super) unreachable_bytes: u32,
47 54 }
48 55
49 56 /// Using a plain `HgPathBuf` of the full path from the repository root as a
@@ -118,9 +125,10 b" impl<'on_disk> ChildNodes<'on_disk> {"
118 125 }
119 126 }
120 127
121 pub(super) fn make_mut(
128 fn make_mut(
122 129 &mut self,
123 130 on_disk: &'on_disk [u8],
131 unreachable_bytes: &mut u32,
124 132 ) -> Result<
125 133 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
126 134 DirstateV2ParseError,
@@ -128,6 +136,8 b" impl<'on_disk> ChildNodes<'on_disk> {"
128 136 match self {
129 137 ChildNodes::InMemory(nodes) => Ok(nodes),
130 138 ChildNodes::OnDisk(nodes) => {
139 *unreachable_bytes +=
140 std::mem::size_of_val::<[on_disk::Node]>(nodes) as u32;
131 141 let nodes = nodes
132 142 .iter()
133 143 .map(|node| {
@@ -406,6 +416,7 b" impl<'on_disk> DirstateMap<'on_disk> {"
406 416 nodes_with_entry_count: 0,
407 417 nodes_with_copy_source_count: 0,
408 418 ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
419 unreachable_bytes: 0,
409 420 }
410 421 }
411 422
@@ -436,6 +447,7 b" impl<'on_disk> DirstateMap<'on_disk> {"
436 447 let tracked = entry.state.is_tracked();
437 448 let node = Self::get_or_insert_node(
438 449 map.on_disk,
450 &mut map.unreachable_bytes,
439 451 &mut map.root,
440 452 path,
441 453 WithBasename::to_cow_borrowed,
@@ -472,18 +484,8 b" impl<'on_disk> DirstateMap<'on_disk> {"
472 484 /// append to the existing data file that contains `self.on_disk` (true),
473 485 /// or create a new data file from scratch (false).
474 486 pub(super) fn write_should_append(&self) -> bool {
475 // Soon this will be a heuristic based on the amount of unreachable
476 // data. For now it’s pseudo-random in order to make tests exercise
477 // both code paths.
478
479 fn bad_rng() -> u32 {
480 std::time::SystemTime::now()
481 .duration_since(std::time::UNIX_EPOCH)
482 .unwrap()
483 .subsec_millis()
484 }
485
486 bad_rng() % 2 == 0
487 let ratio = self.unreachable_bytes as f32 / self.on_disk.len() as f32;
488 ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
487 489 }
488 490
489 491 fn get_node<'tree>(
@@ -514,6 +516,7 b" impl<'on_disk> DirstateMap<'on_disk> {"
514 516 /// other fields while the returned borrow is still valid
515 517 fn get_node_mut<'tree>(
516 518 on_disk: &'on_disk [u8],
519 unreachable_bytes: &mut u32,
517 520 root: &'tree mut ChildNodes<'on_disk>,
518 521 path: &HgPath,
519 522 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
@@ -522,7 +525,9 b" impl<'on_disk> DirstateMap<'on_disk> {"
522 525 let mut component =
523 526 components.next().expect("expected at least one components");
524 527 loop {
525 if let Some(child) = children.make_mut(on_disk)?.get_mut(component)
528 if let Some(child) = children
529 .make_mut(on_disk, unreachable_bytes)?
530 .get_mut(component)
526 531 {
527 532 if let Some(next_component) = components.next() {
528 533 component = next_component;
@@ -542,6 +547,7 b" impl<'on_disk> DirstateMap<'on_disk> {"
542 547 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
543 548 Self::get_or_insert_node(
544 549 self.on_disk,
550 &mut self.unreachable_bytes,
545 551 &mut self.root,
546 552 path,
547 553 WithBasename::to_cow_owned,
@@ -549,8 +555,9 b" impl<'on_disk> DirstateMap<'on_disk> {"
549 555 )
550 556 }
551 557
552 pub(super) fn get_or_insert_node<'tree, 'path>(
558 fn get_or_insert_node<'tree, 'path>(
553 559 on_disk: &'on_disk [u8],
560 unreachable_bytes: &mut u32,
554 561 root: &'tree mut ChildNodes<'on_disk>,
555 562 path: &'path HgPath,
556 563 to_cow: impl Fn(
@@ -569,7 +576,7 b" impl<'on_disk> DirstateMap<'on_disk> {"
569 576 // map already contains that key, without introducing double
570 577 // lookup?
571 578 let child_node = child_nodes
572 .make_mut(on_disk)?
579 .make_mut(on_disk, unreachable_bytes)?
573 580 .entry(to_cow(ancestor_path))
574 581 .or_default();
575 582 if let Some(next) = inclusive_ancestor_paths.next() {
@@ -598,6 +605,7 b" impl<'on_disk> DirstateMap<'on_disk> {"
598 605
599 606 let node = Self::get_or_insert_node(
600 607 self.on_disk,
608 &mut self.unreachable_bytes,
601 609 &mut self.root,
602 610 path,
603 611 WithBasename::to_cow_owned,
@@ -681,6 +689,7 b" impl<'on_disk> DirstateMap<'on_disk> {"
681 689 for path in paths {
682 690 if let Some(node) = Self::get_node_mut(
683 691 self.on_disk,
692 &mut self.unreachable_bytes,
684 693 &mut self.root,
685 694 path.as_ref(),
686 695 )? {
@@ -712,6 +721,12 b" impl<'on_disk> DirstateMap<'on_disk> {"
712 721 Ok(None)
713 722 })
714 723 }
724
725 fn count_dropped_path(unreachable_bytes: &mut u32, path: &Cow<HgPath>) {
726 if let Cow::Borrowed(path) = path {
727 *unreachable_bytes += path.len() as u32
728 }
729 }
715 730 }
716 731
717 732 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
@@ -844,13 +859,14 b" impl<'on_disk> super::dispatch::Dirstate"
844 859 /// removed a node in `nodes`.
845 860 fn recur<'on_disk>(
846 861 on_disk: &'on_disk [u8],
862 unreachable_bytes: &mut u32,
847 863 nodes: &mut ChildNodes<'on_disk>,
848 864 path: &HgPath,
849 865 ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
850 866 let (first_path_component, rest_of_path) =
851 867 path.split_first_component();
852 let node = if let Some(node) =
853 nodes.make_mut(on_disk)?.get_mut(first_path_component)
868 let nodes = nodes.make_mut(on_disk, unreachable_bytes)?;
869 let node = if let Some(node) = nodes.get_mut(first_path_component)
854 870 {
855 871 node
856 872 } else {
@@ -858,9 +874,12 b" impl<'on_disk> super::dispatch::Dirstate"
858 874 };
859 875 let dropped;
860 876 if let Some(rest) = rest_of_path {
861 if let Some((d, removed)) =
862 recur(on_disk, &mut node.children, rest)?
863 {
877 if let Some((d, removed)) = recur(
878 on_disk,
879 unreachable_bytes,
880 &mut node.children,
881 rest,
882 )? {
864 883 dropped = d;
865 884 if dropped.had_entry {
866 885 node.descendants_with_entry_count -= 1;
@@ -884,6 +903,9 b" impl<'on_disk> super::dispatch::Dirstate"
884 903 if had_entry {
885 904 node.data = NodeData::None
886 905 }
906 if let Some(source) = &node.copy_source {
907 DirstateMap::count_dropped_path(unreachable_bytes, source)
908 }
887 909 dropped = Dropped {
888 910 was_tracked: node
889 911 .data
@@ -899,14 +921,22 b" impl<'on_disk> super::dispatch::Dirstate"
899 921 && node.copy_source.is_none()
900 922 && node.children.is_empty();
901 923 if remove {
902 nodes.make_mut(on_disk)?.remove(first_path_component);
924 let (key, _) =
925 nodes.remove_entry(first_path_component).unwrap();
926 DirstateMap::count_dropped_path(
927 unreachable_bytes,
928 key.full_path(),
929 )
903 930 }
904 931 Ok(Some((dropped, remove)))
905 932 }
906 933
907 if let Some((dropped, _removed)) =
908 recur(self.on_disk, &mut self.root, filename)?
909 {
934 if let Some((dropped, _removed)) = recur(
935 self.on_disk,
936 &mut self.unreachable_bytes,
937 &mut self.root,
938 filename,
939 )? {
910 940 if dropped.had_entry {
911 941 self.nodes_with_entry_count -= 1
912 942 }
@@ -926,9 +956,12 b" impl<'on_disk> super::dispatch::Dirstate"
926 956 now: i32,
927 957 ) -> Result<(), DirstateV2ParseError> {
928 958 for filename in filenames {
929 if let Some(node) =
930 Self::get_node_mut(self.on_disk, &mut self.root, &filename)?
931 {
959 if let Some(node) = Self::get_node_mut(
960 self.on_disk,
961 &mut self.unreachable_bytes,
962 &mut self.root,
963 &filename,
964 )? {
932 965 if let NodeData::Entry(entry) = &mut node.data {
933 966 entry.clear_ambiguous_mtime(now);
934 967 }
@@ -1144,16 +1177,20 b" impl<'on_disk> super::dispatch::Dirstate"
1144 1177 key: &HgPath,
1145 1178 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1146 1179 let count = &mut self.nodes_with_copy_source_count;
1147 Ok(
1148 Self::get_node_mut(self.on_disk, &mut self.root, key)?.and_then(
1149 |node| {
1150 if node.copy_source.is_some() {
1151 *count -= 1
1152 }
1153 node.copy_source.take().map(Cow::into_owned)
1154 },
1155 ),
1156 )
1180 let unreachable_bytes = &mut self.unreachable_bytes;
1181 Ok(Self::get_node_mut(
1182 self.on_disk,
1183 unreachable_bytes,
1184 &mut self.root,
1185 key,
1186 )?
1187 .and_then(|node| {
1188 if let Some(source) = &node.copy_source {
1189 *count -= 1;
1190 Self::count_dropped_path(unreachable_bytes, source);
1191 }
1192 node.copy_source.take().map(Cow::into_owned)
1193 }))
1157 1194 }
1158 1195
1159 1196 fn copy_map_insert(
@@ -1163,6 +1200,7 b" impl<'on_disk> super::dispatch::Dirstate"
1163 1200 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1164 1201 let node = Self::get_or_insert_node(
1165 1202 self.on_disk,
1203 &mut self.unreachable_bytes,
1166 1204 &mut self.root,
1167 1205 &key,
1168 1206 WithBasename::to_cow_owned,
@@ -73,6 +73,9 b' struct Root {'
73 73 nodes_with_entry_count: Size,
74 74 nodes_with_copy_source_count: Size,
75 75
76 /// How many bytes of this data file are not used anymore
77 unreachable_bytes: Size,
78
76 79 /// If non-zero, a hash of ignore files that were used for some previous
77 80 /// run of the `status` algorithm.
78 81 ///
@@ -205,7 +208,7 b' type OptPathSlice = PathSlice;'
205 208 /// Make sure that size-affecting changes are made knowingly
206 209 fn _static_assert_size_of() {
207 210 let _ = std::mem::transmute::<DocketHeader, [u8; 81]>;
208 let _ = std::mem::transmute::<Root, [u8; 36]>;
211 let _ = std::mem::transmute::<Root, [u8; 40]>;
209 212 let _ = std::mem::transmute::<Node, [u8; 43]>;
210 213 }
211 214
@@ -283,6 +286,9 b" pub(super) fn read<'on_disk>("
283 286 return Ok(DirstateMap::empty(on_disk));
284 287 }
285 288 let root = read_root(on_disk)?;
289 let mut unreachable_bytes = root.unreachable_bytes.get();
290 // Each append writes a new `Root`, so it’s never reused
291 unreachable_bytes += std::mem::size_of::<Root>() as u32;
286 292 let dirstate_map = DirstateMap {
287 293 on_disk,
288 294 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
@@ -292,6 +298,7 b" pub(super) fn read<'on_disk>("
292 298 nodes_with_entry_count: root.nodes_with_entry_count.get(),
293 299 nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(),
294 300 ignore_patterns_hash: root.ignore_patterns_hash,
301 unreachable_bytes,
295 302 };
296 303 Ok(dirstate_map)
297 304 }
@@ -573,6 +580,7 b' pub(super) fn write('
573 580 nodes_with_copy_source_count: dirstate_map
574 581 .nodes_with_copy_source_count
575 582 .into(),
583 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
576 584 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
577 585 };
578 586 writer.out.extend(root.as_bytes());
General Comments 0
You need to be logged in to leave comments. Login now