##// END OF EJS Templates
dirstate-v2: Add heuristic for when to create a new data file...
Simon Sapin -
r48481:d9411836 default
parent child Browse files
Show More
@@ -29,6 +29,10 b' use crate::StateMapIter;'
29 use crate::StatusError;
29 use crate::StatusError;
30 use crate::StatusOptions;
30 use crate::StatusOptions;
31
31
32 /// Append to an existing data file if the amount of unreachable data (not used
33 /// anymore) is less than this fraction of the total amount of existing data.
34 const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5;
35
32 pub struct DirstateMap<'on_disk> {
36 pub struct DirstateMap<'on_disk> {
33 /// Contents of the `.hg/dirstate` file
37 /// Contents of the `.hg/dirstate` file
34 pub(super) on_disk: &'on_disk [u8],
38 pub(super) on_disk: &'on_disk [u8],
@@ -44,6 +48,9 b" pub struct DirstateMap<'on_disk> {"
44
48
45 /// See on_disk::Header
49 /// See on_disk::Header
46 pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
50 pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
51
52 /// How many bytes of `on_disk` are not used anymore
53 pub(super) unreachable_bytes: u32,
47 }
54 }
48
55
49 /// Using a plain `HgPathBuf` of the full path from the repository root as a
56 /// Using a plain `HgPathBuf` of the full path from the repository root as a
@@ -118,9 +125,10 b" impl<'on_disk> ChildNodes<'on_disk> {"
118 }
125 }
119 }
126 }
120
127
121 pub(super) fn make_mut(
128 fn make_mut(
122 &mut self,
129 &mut self,
123 on_disk: &'on_disk [u8],
130 on_disk: &'on_disk [u8],
131 unreachable_bytes: &mut u32,
124 ) -> Result<
132 ) -> Result<
125 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
133 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
126 DirstateV2ParseError,
134 DirstateV2ParseError,
@@ -128,6 +136,8 b" impl<'on_disk> ChildNodes<'on_disk> {"
128 match self {
136 match self {
129 ChildNodes::InMemory(nodes) => Ok(nodes),
137 ChildNodes::InMemory(nodes) => Ok(nodes),
130 ChildNodes::OnDisk(nodes) => {
138 ChildNodes::OnDisk(nodes) => {
139 *unreachable_bytes +=
140 std::mem::size_of_val::<[on_disk::Node]>(nodes) as u32;
131 let nodes = nodes
141 let nodes = nodes
132 .iter()
142 .iter()
133 .map(|node| {
143 .map(|node| {
@@ -406,6 +416,7 b" impl<'on_disk> DirstateMap<'on_disk> {"
406 nodes_with_entry_count: 0,
416 nodes_with_entry_count: 0,
407 nodes_with_copy_source_count: 0,
417 nodes_with_copy_source_count: 0,
408 ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
418 ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
419 unreachable_bytes: 0,
409 }
420 }
410 }
421 }
411
422
@@ -436,6 +447,7 b" impl<'on_disk> DirstateMap<'on_disk> {"
436 let tracked = entry.state.is_tracked();
447 let tracked = entry.state.is_tracked();
437 let node = Self::get_or_insert_node(
448 let node = Self::get_or_insert_node(
438 map.on_disk,
449 map.on_disk,
450 &mut map.unreachable_bytes,
439 &mut map.root,
451 &mut map.root,
440 path,
452 path,
441 WithBasename::to_cow_borrowed,
453 WithBasename::to_cow_borrowed,
@@ -472,18 +484,8 b" impl<'on_disk> DirstateMap<'on_disk> {"
472 /// append to the existing data file that contains `self.on_disk` (true),
484 /// append to the existing data file that contains `self.on_disk` (true),
473 /// or create a new data file from scratch (false).
485 /// or create a new data file from scratch (false).
474 pub(super) fn write_should_append(&self) -> bool {
486 pub(super) fn write_should_append(&self) -> bool {
475 // Soon this will be a heuristic based on the amount of unreachable
487 let ratio = self.unreachable_bytes as f32 / self.on_disk.len() as f32;
476 // data. For now it’s pseudo-random in order to make tests exercise
488 ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
477 // both code paths.
478
479 fn bad_rng() -> u32 {
480 std::time::SystemTime::now()
481 .duration_since(std::time::UNIX_EPOCH)
482 .unwrap()
483 .subsec_millis()
484 }
485
486 bad_rng() % 2 == 0
487 }
489 }
488
490
489 fn get_node<'tree>(
491 fn get_node<'tree>(
@@ -514,6 +516,7 b" impl<'on_disk> DirstateMap<'on_disk> {"
514 /// other fields while the returned borrow is still valid
516 /// other fields while the returned borrow is still valid
515 fn get_node_mut<'tree>(
517 fn get_node_mut<'tree>(
516 on_disk: &'on_disk [u8],
518 on_disk: &'on_disk [u8],
519 unreachable_bytes: &mut u32,
517 root: &'tree mut ChildNodes<'on_disk>,
520 root: &'tree mut ChildNodes<'on_disk>,
518 path: &HgPath,
521 path: &HgPath,
519 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
522 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
@@ -522,7 +525,9 b" impl<'on_disk> DirstateMap<'on_disk> {"
522 let mut component =
525 let mut component =
523 components.next().expect("expected at least one components");
526 components.next().expect("expected at least one components");
524 loop {
527 loop {
525 if let Some(child) = children.make_mut(on_disk)?.get_mut(component)
528 if let Some(child) = children
529 .make_mut(on_disk, unreachable_bytes)?
530 .get_mut(component)
526 {
531 {
527 if let Some(next_component) = components.next() {
532 if let Some(next_component) = components.next() {
528 component = next_component;
533 component = next_component;
@@ -542,6 +547,7 b" impl<'on_disk> DirstateMap<'on_disk> {"
542 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
547 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
543 Self::get_or_insert_node(
548 Self::get_or_insert_node(
544 self.on_disk,
549 self.on_disk,
550 &mut self.unreachable_bytes,
545 &mut self.root,
551 &mut self.root,
546 path,
552 path,
547 WithBasename::to_cow_owned,
553 WithBasename::to_cow_owned,
@@ -549,8 +555,9 b" impl<'on_disk> DirstateMap<'on_disk> {"
549 )
555 )
550 }
556 }
551
557
552 pub(super) fn get_or_insert_node<'tree, 'path>(
558 fn get_or_insert_node<'tree, 'path>(
553 on_disk: &'on_disk [u8],
559 on_disk: &'on_disk [u8],
560 unreachable_bytes: &mut u32,
554 root: &'tree mut ChildNodes<'on_disk>,
561 root: &'tree mut ChildNodes<'on_disk>,
555 path: &'path HgPath,
562 path: &'path HgPath,
556 to_cow: impl Fn(
563 to_cow: impl Fn(
@@ -569,7 +576,7 b" impl<'on_disk> DirstateMap<'on_disk> {"
569 // map already contains that key, without introducing double
576 // map already contains that key, without introducing double
570 // lookup?
577 // lookup?
571 let child_node = child_nodes
578 let child_node = child_nodes
572 .make_mut(on_disk)?
579 .make_mut(on_disk, unreachable_bytes)?
573 .entry(to_cow(ancestor_path))
580 .entry(to_cow(ancestor_path))
574 .or_default();
581 .or_default();
575 if let Some(next) = inclusive_ancestor_paths.next() {
582 if let Some(next) = inclusive_ancestor_paths.next() {
@@ -598,6 +605,7 b" impl<'on_disk> DirstateMap<'on_disk> {"
598
605
599 let node = Self::get_or_insert_node(
606 let node = Self::get_or_insert_node(
600 self.on_disk,
607 self.on_disk,
608 &mut self.unreachable_bytes,
601 &mut self.root,
609 &mut self.root,
602 path,
610 path,
603 WithBasename::to_cow_owned,
611 WithBasename::to_cow_owned,
@@ -681,6 +689,7 b" impl<'on_disk> DirstateMap<'on_disk> {"
681 for path in paths {
689 for path in paths {
682 if let Some(node) = Self::get_node_mut(
690 if let Some(node) = Self::get_node_mut(
683 self.on_disk,
691 self.on_disk,
692 &mut self.unreachable_bytes,
684 &mut self.root,
693 &mut self.root,
685 path.as_ref(),
694 path.as_ref(),
686 )? {
695 )? {
@@ -712,6 +721,12 b" impl<'on_disk> DirstateMap<'on_disk> {"
712 Ok(None)
721 Ok(None)
713 })
722 })
714 }
723 }
724
725 fn count_dropped_path(unreachable_bytes: &mut u32, path: &Cow<HgPath>) {
726 if let Cow::Borrowed(path) = path {
727 *unreachable_bytes += path.len() as u32
728 }
729 }
715 }
730 }
716
731
717 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
732 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
@@ -844,13 +859,14 b" impl<'on_disk> super::dispatch::Dirstate"
844 /// removed a node in `nodes`.
859 /// removed a node in `nodes`.
845 fn recur<'on_disk>(
860 fn recur<'on_disk>(
846 on_disk: &'on_disk [u8],
861 on_disk: &'on_disk [u8],
862 unreachable_bytes: &mut u32,
847 nodes: &mut ChildNodes<'on_disk>,
863 nodes: &mut ChildNodes<'on_disk>,
848 path: &HgPath,
864 path: &HgPath,
849 ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
865 ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
850 let (first_path_component, rest_of_path) =
866 let (first_path_component, rest_of_path) =
851 path.split_first_component();
867 path.split_first_component();
852 let node = if let Some(node) =
868 let nodes = nodes.make_mut(on_disk, unreachable_bytes)?;
853 nodes.make_mut(on_disk)?.get_mut(first_path_component)
869 let node = if let Some(node) = nodes.get_mut(first_path_component)
854 {
870 {
855 node
871 node
856 } else {
872 } else {
@@ -858,9 +874,12 b" impl<'on_disk> super::dispatch::Dirstate"
858 };
874 };
859 let dropped;
875 let dropped;
860 if let Some(rest) = rest_of_path {
876 if let Some(rest) = rest_of_path {
861 if let Some((d, removed)) =
877 if let Some((d, removed)) = recur(
862 recur(on_disk, &mut node.children, rest)?
878 on_disk,
863 {
879 unreachable_bytes,
880 &mut node.children,
881 rest,
882 )? {
864 dropped = d;
883 dropped = d;
865 if dropped.had_entry {
884 if dropped.had_entry {
866 node.descendants_with_entry_count -= 1;
885 node.descendants_with_entry_count -= 1;
@@ -884,6 +903,9 b" impl<'on_disk> super::dispatch::Dirstate"
884 if had_entry {
903 if had_entry {
885 node.data = NodeData::None
904 node.data = NodeData::None
886 }
905 }
906 if let Some(source) = &node.copy_source {
907 DirstateMap::count_dropped_path(unreachable_bytes, source)
908 }
887 dropped = Dropped {
909 dropped = Dropped {
888 was_tracked: node
910 was_tracked: node
889 .data
911 .data
@@ -899,14 +921,22 b" impl<'on_disk> super::dispatch::Dirstate"
899 && node.copy_source.is_none()
921 && node.copy_source.is_none()
900 && node.children.is_empty();
922 && node.children.is_empty();
901 if remove {
923 if remove {
902 nodes.make_mut(on_disk)?.remove(first_path_component);
924 let (key, _) =
925 nodes.remove_entry(first_path_component).unwrap();
926 DirstateMap::count_dropped_path(
927 unreachable_bytes,
928 key.full_path(),
929 )
903 }
930 }
904 Ok(Some((dropped, remove)))
931 Ok(Some((dropped, remove)))
905 }
932 }
906
933
907 if let Some((dropped, _removed)) =
934 if let Some((dropped, _removed)) = recur(
908 recur(self.on_disk, &mut self.root, filename)?
935 self.on_disk,
909 {
936 &mut self.unreachable_bytes,
937 &mut self.root,
938 filename,
939 )? {
910 if dropped.had_entry {
940 if dropped.had_entry {
911 self.nodes_with_entry_count -= 1
941 self.nodes_with_entry_count -= 1
912 }
942 }
@@ -926,9 +956,12 b" impl<'on_disk> super::dispatch::Dirstate"
926 now: i32,
956 now: i32,
927 ) -> Result<(), DirstateV2ParseError> {
957 ) -> Result<(), DirstateV2ParseError> {
928 for filename in filenames {
958 for filename in filenames {
929 if let Some(node) =
959 if let Some(node) = Self::get_node_mut(
930 Self::get_node_mut(self.on_disk, &mut self.root, &filename)?
960 self.on_disk,
931 {
961 &mut self.unreachable_bytes,
962 &mut self.root,
963 &filename,
964 )? {
932 if let NodeData::Entry(entry) = &mut node.data {
965 if let NodeData::Entry(entry) = &mut node.data {
933 entry.clear_ambiguous_mtime(now);
966 entry.clear_ambiguous_mtime(now);
934 }
967 }
@@ -1144,16 +1177,20 b" impl<'on_disk> super::dispatch::Dirstate"
1144 key: &HgPath,
1177 key: &HgPath,
1145 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1178 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1146 let count = &mut self.nodes_with_copy_source_count;
1179 let count = &mut self.nodes_with_copy_source_count;
1147 Ok(
1180 let unreachable_bytes = &mut self.unreachable_bytes;
1148 Self::get_node_mut(self.on_disk, &mut self.root, key)?.and_then(
1181 Ok(Self::get_node_mut(
1149 |node| {
1182 self.on_disk,
1150 if node.copy_source.is_some() {
1183 unreachable_bytes,
1151 *count -= 1
1184 &mut self.root,
1152 }
1185 key,
1153 node.copy_source.take().map(Cow::into_owned)
1186 )?
1154 },
1187 .and_then(|node| {
1155 ),
1188 if let Some(source) = &node.copy_source {
1156 )
1189 *count -= 1;
1190 Self::count_dropped_path(unreachable_bytes, source);
1191 }
1192 node.copy_source.take().map(Cow::into_owned)
1193 }))
1157 }
1194 }
1158
1195
1159 fn copy_map_insert(
1196 fn copy_map_insert(
@@ -1163,6 +1200,7 b" impl<'on_disk> super::dispatch::Dirstate"
1163 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1200 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1164 let node = Self::get_or_insert_node(
1201 let node = Self::get_or_insert_node(
1165 self.on_disk,
1202 self.on_disk,
1203 &mut self.unreachable_bytes,
1166 &mut self.root,
1204 &mut self.root,
1167 &key,
1205 &key,
1168 WithBasename::to_cow_owned,
1206 WithBasename::to_cow_owned,
@@ -73,6 +73,9 b' struct Root {'
73 nodes_with_entry_count: Size,
73 nodes_with_entry_count: Size,
74 nodes_with_copy_source_count: Size,
74 nodes_with_copy_source_count: Size,
75
75
76 /// How many bytes of this data file are not used anymore
77 unreachable_bytes: Size,
78
76 /// If non-zero, a hash of ignore files that were used for some previous
79 /// If non-zero, a hash of ignore files that were used for some previous
77 /// run of the `status` algorithm.
80 /// run of the `status` algorithm.
78 ///
81 ///
@@ -205,7 +208,7 b' type OptPathSlice = PathSlice;'
205 /// Make sure that size-affecting changes are made knowingly
208 /// Make sure that size-affecting changes are made knowingly
206 fn _static_assert_size_of() {
209 fn _static_assert_size_of() {
207 let _ = std::mem::transmute::<DocketHeader, [u8; 81]>;
210 let _ = std::mem::transmute::<DocketHeader, [u8; 81]>;
208 let _ = std::mem::transmute::<Root, [u8; 36]>;
211 let _ = std::mem::transmute::<Root, [u8; 40]>;
209 let _ = std::mem::transmute::<Node, [u8; 43]>;
212 let _ = std::mem::transmute::<Node, [u8; 43]>;
210 }
213 }
211
214
@@ -283,6 +286,9 b" pub(super) fn read<'on_disk>("
283 return Ok(DirstateMap::empty(on_disk));
286 return Ok(DirstateMap::empty(on_disk));
284 }
287 }
285 let root = read_root(on_disk)?;
288 let root = read_root(on_disk)?;
289 let mut unreachable_bytes = root.unreachable_bytes.get();
290 // Each append writes a new `Root`, so it’s never reused
291 unreachable_bytes += std::mem::size_of::<Root>() as u32;
286 let dirstate_map = DirstateMap {
292 let dirstate_map = DirstateMap {
287 on_disk,
293 on_disk,
288 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
294 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
@@ -292,6 +298,7 b" pub(super) fn read<'on_disk>("
292 nodes_with_entry_count: root.nodes_with_entry_count.get(),
298 nodes_with_entry_count: root.nodes_with_entry_count.get(),
293 nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(),
299 nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(),
294 ignore_patterns_hash: root.ignore_patterns_hash,
300 ignore_patterns_hash: root.ignore_patterns_hash,
301 unreachable_bytes,
295 };
302 };
296 Ok(dirstate_map)
303 Ok(dirstate_map)
297 }
304 }
@@ -573,6 +580,7 b' pub(super) fn write('
573 nodes_with_copy_source_count: dirstate_map
580 nodes_with_copy_source_count: dirstate_map
574 .nodes_with_copy_source_count
581 .nodes_with_copy_source_count
575 .into(),
582 .into(),
583 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
576 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
584 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
577 };
585 };
578 writer.out.extend(root.as_bytes());
586 writer.out.extend(root.as_bytes());
General Comments 0
You need to be logged in to leave comments. Login now