Show More
@@ -29,6 +29,10 b' use crate::StateMapIter;' | |||||
29 | use crate::StatusError; |
|
29 | use crate::StatusError; | |
30 | use crate::StatusOptions; |
|
30 | use crate::StatusOptions; | |
31 |
|
31 | |||
|
32 | /// Append to an existing data file if the amount of unreachable data (not used | |||
|
33 | /// anymore) is less than this fraction of the total amount of existing data. | |||
|
34 | const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5; | |||
|
35 | ||||
32 | pub struct DirstateMap<'on_disk> { |
|
36 | pub struct DirstateMap<'on_disk> { | |
33 | /// Contents of the `.hg/dirstate` file |
|
37 | /// Contents of the `.hg/dirstate` file | |
34 | pub(super) on_disk: &'on_disk [u8], |
|
38 | pub(super) on_disk: &'on_disk [u8], | |
@@ -44,6 +48,9 b" pub struct DirstateMap<'on_disk> {" | |||||
44 |
|
48 | |||
45 | /// See on_disk::Header |
|
49 | /// See on_disk::Header | |
46 | pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash, |
|
50 | pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash, | |
|
51 | ||||
|
52 | /// How many bytes of `on_disk` are not used anymore | |||
|
53 | pub(super) unreachable_bytes: u32, | |||
47 | } |
|
54 | } | |
48 |
|
55 | |||
49 | /// Using a plain `HgPathBuf` of the full path from the repository root as a |
|
56 | /// Using a plain `HgPathBuf` of the full path from the repository root as a | |
@@ -118,9 +125,10 b" impl<'on_disk> ChildNodes<'on_disk> {" | |||||
118 | } |
|
125 | } | |
119 | } |
|
126 | } | |
120 |
|
127 | |||
121 |
|
|
128 | fn make_mut( | |
122 | &mut self, |
|
129 | &mut self, | |
123 | on_disk: &'on_disk [u8], |
|
130 | on_disk: &'on_disk [u8], | |
|
131 | unreachable_bytes: &mut u32, | |||
124 | ) -> Result< |
|
132 | ) -> Result< | |
125 | &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>, |
|
133 | &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>, | |
126 | DirstateV2ParseError, |
|
134 | DirstateV2ParseError, | |
@@ -128,6 +136,8 b" impl<'on_disk> ChildNodes<'on_disk> {" | |||||
128 | match self { |
|
136 | match self { | |
129 | ChildNodes::InMemory(nodes) => Ok(nodes), |
|
137 | ChildNodes::InMemory(nodes) => Ok(nodes), | |
130 | ChildNodes::OnDisk(nodes) => { |
|
138 | ChildNodes::OnDisk(nodes) => { | |
|
139 | *unreachable_bytes += | |||
|
140 | std::mem::size_of_val::<[on_disk::Node]>(nodes) as u32; | |||
131 | let nodes = nodes |
|
141 | let nodes = nodes | |
132 | .iter() |
|
142 | .iter() | |
133 | .map(|node| { |
|
143 | .map(|node| { | |
@@ -406,6 +416,7 b" impl<'on_disk> DirstateMap<'on_disk> {" | |||||
406 | nodes_with_entry_count: 0, |
|
416 | nodes_with_entry_count: 0, | |
407 | nodes_with_copy_source_count: 0, |
|
417 | nodes_with_copy_source_count: 0, | |
408 | ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN], |
|
418 | ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN], | |
|
419 | unreachable_bytes: 0, | |||
409 | } |
|
420 | } | |
410 | } |
|
421 | } | |
411 |
|
422 | |||
@@ -436,6 +447,7 b" impl<'on_disk> DirstateMap<'on_disk> {" | |||||
436 | let tracked = entry.state.is_tracked(); |
|
447 | let tracked = entry.state.is_tracked(); | |
437 | let node = Self::get_or_insert_node( |
|
448 | let node = Self::get_or_insert_node( | |
438 | map.on_disk, |
|
449 | map.on_disk, | |
|
450 | &mut map.unreachable_bytes, | |||
439 | &mut map.root, |
|
451 | &mut map.root, | |
440 | path, |
|
452 | path, | |
441 | WithBasename::to_cow_borrowed, |
|
453 | WithBasename::to_cow_borrowed, | |
@@ -472,18 +484,8 b" impl<'on_disk> DirstateMap<'on_disk> {" | |||||
472 | /// append to the existing data file that contains `self.on_disk` (true), |
|
484 | /// append to the existing data file that contains `self.on_disk` (true), | |
473 | /// or create a new data file from scratch (false). |
|
485 | /// or create a new data file from scratch (false). | |
474 | pub(super) fn write_should_append(&self) -> bool { |
|
486 | pub(super) fn write_should_append(&self) -> bool { | |
475 | // Soon this will be a heuristic based on the amount of unreachable |
|
487 | let ratio = self.unreachable_bytes as f32 / self.on_disk.len() as f32; | |
476 | // data. For now it’s pseudo-random in order to make tests exercise |
|
488 | ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO | |
477 | // both code paths. |
|
|||
478 |
|
||||
479 | fn bad_rng() -> u32 { |
|
|||
480 | std::time::SystemTime::now() |
|
|||
481 | .duration_since(std::time::UNIX_EPOCH) |
|
|||
482 | .unwrap() |
|
|||
483 | .subsec_millis() |
|
|||
484 | } |
|
|||
485 |
|
||||
486 | bad_rng() % 2 == 0 |
|
|||
487 | } |
|
489 | } | |
488 |
|
490 | |||
489 | fn get_node<'tree>( |
|
491 | fn get_node<'tree>( | |
@@ -514,6 +516,7 b" impl<'on_disk> DirstateMap<'on_disk> {" | |||||
514 | /// other fields while the returned borrow is still valid |
|
516 | /// other fields while the returned borrow is still valid | |
515 | fn get_node_mut<'tree>( |
|
517 | fn get_node_mut<'tree>( | |
516 | on_disk: &'on_disk [u8], |
|
518 | on_disk: &'on_disk [u8], | |
|
519 | unreachable_bytes: &mut u32, | |||
517 | root: &'tree mut ChildNodes<'on_disk>, |
|
520 | root: &'tree mut ChildNodes<'on_disk>, | |
518 | path: &HgPath, |
|
521 | path: &HgPath, | |
519 | ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> { |
|
522 | ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> { | |
@@ -522,7 +525,9 b" impl<'on_disk> DirstateMap<'on_disk> {" | |||||
522 | let mut component = |
|
525 | let mut component = | |
523 | components.next().expect("expected at least one components"); |
|
526 | components.next().expect("expected at least one components"); | |
524 | loop { |
|
527 | loop { | |
525 |
if let Some(child) = children |
|
528 | if let Some(child) = children | |
|
529 | .make_mut(on_disk, unreachable_bytes)? | |||
|
530 | .get_mut(component) | |||
526 | { |
|
531 | { | |
527 | if let Some(next_component) = components.next() { |
|
532 | if let Some(next_component) = components.next() { | |
528 | component = next_component; |
|
533 | component = next_component; | |
@@ -542,6 +547,7 b" impl<'on_disk> DirstateMap<'on_disk> {" | |||||
542 | ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> { |
|
547 | ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> { | |
543 | Self::get_or_insert_node( |
|
548 | Self::get_or_insert_node( | |
544 | self.on_disk, |
|
549 | self.on_disk, | |
|
550 | &mut self.unreachable_bytes, | |||
545 | &mut self.root, |
|
551 | &mut self.root, | |
546 | path, |
|
552 | path, | |
547 | WithBasename::to_cow_owned, |
|
553 | WithBasename::to_cow_owned, | |
@@ -549,8 +555,9 b" impl<'on_disk> DirstateMap<'on_disk> {" | |||||
549 | ) |
|
555 | ) | |
550 | } |
|
556 | } | |
551 |
|
557 | |||
552 |
|
|
558 | fn get_or_insert_node<'tree, 'path>( | |
553 | on_disk: &'on_disk [u8], |
|
559 | on_disk: &'on_disk [u8], | |
|
560 | unreachable_bytes: &mut u32, | |||
554 | root: &'tree mut ChildNodes<'on_disk>, |
|
561 | root: &'tree mut ChildNodes<'on_disk>, | |
555 | path: &'path HgPath, |
|
562 | path: &'path HgPath, | |
556 | to_cow: impl Fn( |
|
563 | to_cow: impl Fn( | |
@@ -569,7 +576,7 b" impl<'on_disk> DirstateMap<'on_disk> {" | |||||
569 | // map already contains that key, without introducing double |
|
576 | // map already contains that key, without introducing double | |
570 | // lookup? |
|
577 | // lookup? | |
571 | let child_node = child_nodes |
|
578 | let child_node = child_nodes | |
572 | .make_mut(on_disk)? |
|
579 | .make_mut(on_disk, unreachable_bytes)? | |
573 | .entry(to_cow(ancestor_path)) |
|
580 | .entry(to_cow(ancestor_path)) | |
574 | .or_default(); |
|
581 | .or_default(); | |
575 | if let Some(next) = inclusive_ancestor_paths.next() { |
|
582 | if let Some(next) = inclusive_ancestor_paths.next() { | |
@@ -598,6 +605,7 b" impl<'on_disk> DirstateMap<'on_disk> {" | |||||
598 |
|
605 | |||
599 | let node = Self::get_or_insert_node( |
|
606 | let node = Self::get_or_insert_node( | |
600 | self.on_disk, |
|
607 | self.on_disk, | |
|
608 | &mut self.unreachable_bytes, | |||
601 | &mut self.root, |
|
609 | &mut self.root, | |
602 | path, |
|
610 | path, | |
603 | WithBasename::to_cow_owned, |
|
611 | WithBasename::to_cow_owned, | |
@@ -681,6 +689,7 b" impl<'on_disk> DirstateMap<'on_disk> {" | |||||
681 | for path in paths { |
|
689 | for path in paths { | |
682 | if let Some(node) = Self::get_node_mut( |
|
690 | if let Some(node) = Self::get_node_mut( | |
683 | self.on_disk, |
|
691 | self.on_disk, | |
|
692 | &mut self.unreachable_bytes, | |||
684 | &mut self.root, |
|
693 | &mut self.root, | |
685 | path.as_ref(), |
|
694 | path.as_ref(), | |
686 | )? { |
|
695 | )? { | |
@@ -712,6 +721,12 b" impl<'on_disk> DirstateMap<'on_disk> {" | |||||
712 | Ok(None) |
|
721 | Ok(None) | |
713 | }) |
|
722 | }) | |
714 | } |
|
723 | } | |
|
724 | ||||
|
725 | fn count_dropped_path(unreachable_bytes: &mut u32, path: &Cow<HgPath>) { | |||
|
726 | if let Cow::Borrowed(path) = path { | |||
|
727 | *unreachable_bytes += path.len() as u32 | |||
|
728 | } | |||
|
729 | } | |||
715 | } |
|
730 | } | |
716 |
|
731 | |||
717 | /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s. |
|
732 | /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s. | |
@@ -844,13 +859,14 b" impl<'on_disk> super::dispatch::Dirstate" | |||||
844 | /// removed a node in `nodes`. |
|
859 | /// removed a node in `nodes`. | |
845 | fn recur<'on_disk>( |
|
860 | fn recur<'on_disk>( | |
846 | on_disk: &'on_disk [u8], |
|
861 | on_disk: &'on_disk [u8], | |
|
862 | unreachable_bytes: &mut u32, | |||
847 | nodes: &mut ChildNodes<'on_disk>, |
|
863 | nodes: &mut ChildNodes<'on_disk>, | |
848 | path: &HgPath, |
|
864 | path: &HgPath, | |
849 | ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> { |
|
865 | ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> { | |
850 | let (first_path_component, rest_of_path) = |
|
866 | let (first_path_component, rest_of_path) = | |
851 | path.split_first_component(); |
|
867 | path.split_first_component(); | |
852 | let node = if let Some(node) = |
|
868 | let nodes = nodes.make_mut(on_disk, unreachable_bytes)?; | |
853 |
nodes |
|
869 | let node = if let Some(node) = nodes.get_mut(first_path_component) | |
854 | { |
|
870 | { | |
855 | node |
|
871 | node | |
856 | } else { |
|
872 | } else { | |
@@ -858,9 +874,12 b" impl<'on_disk> super::dispatch::Dirstate" | |||||
858 | }; |
|
874 | }; | |
859 | let dropped; |
|
875 | let dropped; | |
860 | if let Some(rest) = rest_of_path { |
|
876 | if let Some(rest) = rest_of_path { | |
861 | if let Some((d, removed)) = |
|
877 | if let Some((d, removed)) = recur( | |
862 |
|
|
878 | on_disk, | |
863 | { |
|
879 | unreachable_bytes, | |
|
880 | &mut node.children, | |||
|
881 | rest, | |||
|
882 | )? { | |||
864 | dropped = d; |
|
883 | dropped = d; | |
865 | if dropped.had_entry { |
|
884 | if dropped.had_entry { | |
866 | node.descendants_with_entry_count -= 1; |
|
885 | node.descendants_with_entry_count -= 1; | |
@@ -884,6 +903,9 b" impl<'on_disk> super::dispatch::Dirstate" | |||||
884 | if had_entry { |
|
903 | if had_entry { | |
885 | node.data = NodeData::None |
|
904 | node.data = NodeData::None | |
886 | } |
|
905 | } | |
|
906 | if let Some(source) = &node.copy_source { | |||
|
907 | DirstateMap::count_dropped_path(unreachable_bytes, source) | |||
|
908 | } | |||
887 | dropped = Dropped { |
|
909 | dropped = Dropped { | |
888 | was_tracked: node |
|
910 | was_tracked: node | |
889 | .data |
|
911 | .data | |
@@ -899,14 +921,22 b" impl<'on_disk> super::dispatch::Dirstate" | |||||
899 | && node.copy_source.is_none() |
|
921 | && node.copy_source.is_none() | |
900 | && node.children.is_empty(); |
|
922 | && node.children.is_empty(); | |
901 | if remove { |
|
923 | if remove { | |
902 | nodes.make_mut(on_disk)?.remove(first_path_component); |
|
924 | let (key, _) = | |
|
925 | nodes.remove_entry(first_path_component).unwrap(); | |||
|
926 | DirstateMap::count_dropped_path( | |||
|
927 | unreachable_bytes, | |||
|
928 | key.full_path(), | |||
|
929 | ) | |||
903 | } |
|
930 | } | |
904 | Ok(Some((dropped, remove))) |
|
931 | Ok(Some((dropped, remove))) | |
905 | } |
|
932 | } | |
906 |
|
933 | |||
907 | if let Some((dropped, _removed)) = |
|
934 | if let Some((dropped, _removed)) = recur( | |
908 |
|
|
935 | self.on_disk, | |
909 | { |
|
936 | &mut self.unreachable_bytes, | |
|
937 | &mut self.root, | |||
|
938 | filename, | |||
|
939 | )? { | |||
910 | if dropped.had_entry { |
|
940 | if dropped.had_entry { | |
911 | self.nodes_with_entry_count -= 1 |
|
941 | self.nodes_with_entry_count -= 1 | |
912 | } |
|
942 | } | |
@@ -926,9 +956,12 b" impl<'on_disk> super::dispatch::Dirstate" | |||||
926 | now: i32, |
|
956 | now: i32, | |
927 | ) -> Result<(), DirstateV2ParseError> { |
|
957 | ) -> Result<(), DirstateV2ParseError> { | |
928 | for filename in filenames { |
|
958 | for filename in filenames { | |
929 | if let Some(node) = |
|
959 | if let Some(node) = Self::get_node_mut( | |
930 | Self::get_node_mut(self.on_disk, &mut self.root, &filename)? |
|
960 | self.on_disk, | |
931 | { |
|
961 | &mut self.unreachable_bytes, | |
|
962 | &mut self.root, | |||
|
963 | &filename, | |||
|
964 | )? { | |||
932 | if let NodeData::Entry(entry) = &mut node.data { |
|
965 | if let NodeData::Entry(entry) = &mut node.data { | |
933 | entry.clear_ambiguous_mtime(now); |
|
966 | entry.clear_ambiguous_mtime(now); | |
934 | } |
|
967 | } | |
@@ -1144,16 +1177,20 b" impl<'on_disk> super::dispatch::Dirstate" | |||||
1144 | key: &HgPath, |
|
1177 | key: &HgPath, | |
1145 | ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> { |
|
1178 | ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> { | |
1146 | let count = &mut self.nodes_with_copy_source_count; |
|
1179 | let count = &mut self.nodes_with_copy_source_count; | |
1147 | Ok( |
|
1180 | let unreachable_bytes = &mut self.unreachable_bytes; | |
1148 | Self::get_node_mut(self.on_disk, &mut self.root, key)?.and_then( |
|
1181 | Ok(Self::get_node_mut( | |
1149 | |node| { |
|
1182 | self.on_disk, | |
1150 | if node.copy_source.is_some() { |
|
1183 | unreachable_bytes, | |
1151 | *count -= 1 |
|
1184 | &mut self.root, | |
|
1185 | key, | |||
|
1186 | )? | |||
|
1187 | .and_then(|node| { | |||
|
1188 | if let Some(source) = &node.copy_source { | |||
|
1189 | *count -= 1; | |||
|
1190 | Self::count_dropped_path(unreachable_bytes, source); | |||
1152 |
|
|
1191 | } | |
1153 |
|
|
1192 | node.copy_source.take().map(Cow::into_owned) | |
1154 |
|
|
1193 | })) | |
1155 | ), |
|
|||
1156 | ) |
|
|||
1157 | } |
|
1194 | } | |
1158 |
|
1195 | |||
1159 | fn copy_map_insert( |
|
1196 | fn copy_map_insert( | |
@@ -1163,6 +1200,7 b" impl<'on_disk> super::dispatch::Dirstate" | |||||
1163 | ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> { |
|
1200 | ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> { | |
1164 | let node = Self::get_or_insert_node( |
|
1201 | let node = Self::get_or_insert_node( | |
1165 | self.on_disk, |
|
1202 | self.on_disk, | |
|
1203 | &mut self.unreachable_bytes, | |||
1166 | &mut self.root, |
|
1204 | &mut self.root, | |
1167 | &key, |
|
1205 | &key, | |
1168 | WithBasename::to_cow_owned, |
|
1206 | WithBasename::to_cow_owned, |
@@ -73,6 +73,9 b' struct Root {' | |||||
73 | nodes_with_entry_count: Size, |
|
73 | nodes_with_entry_count: Size, | |
74 | nodes_with_copy_source_count: Size, |
|
74 | nodes_with_copy_source_count: Size, | |
75 |
|
75 | |||
|
76 | /// How many bytes of this data file are not used anymore | |||
|
77 | unreachable_bytes: Size, | |||
|
78 | ||||
76 | /// If non-zero, a hash of ignore files that were used for some previous |
|
79 | /// If non-zero, a hash of ignore files that were used for some previous | |
77 | /// run of the `status` algorithm. |
|
80 | /// run of the `status` algorithm. | |
78 | /// |
|
81 | /// | |
@@ -205,7 +208,7 b' type OptPathSlice = PathSlice;' | |||||
205 | /// Make sure that size-affecting changes are made knowingly |
|
208 | /// Make sure that size-affecting changes are made knowingly | |
206 | fn _static_assert_size_of() { |
|
209 | fn _static_assert_size_of() { | |
207 | let _ = std::mem::transmute::<DocketHeader, [u8; 81]>; |
|
210 | let _ = std::mem::transmute::<DocketHeader, [u8; 81]>; | |
208 |
let _ = std::mem::transmute::<Root, [u8; |
|
211 | let _ = std::mem::transmute::<Root, [u8; 40]>; | |
209 | let _ = std::mem::transmute::<Node, [u8; 43]>; |
|
212 | let _ = std::mem::transmute::<Node, [u8; 43]>; | |
210 | } |
|
213 | } | |
211 |
|
214 | |||
@@ -283,6 +286,9 b" pub(super) fn read<'on_disk>(" | |||||
283 | return Ok(DirstateMap::empty(on_disk)); |
|
286 | return Ok(DirstateMap::empty(on_disk)); | |
284 | } |
|
287 | } | |
285 | let root = read_root(on_disk)?; |
|
288 | let root = read_root(on_disk)?; | |
|
289 | let mut unreachable_bytes = root.unreachable_bytes.get(); | |||
|
290 | // Each append writes a new `Root`, so it’s never reused | |||
|
291 | unreachable_bytes += std::mem::size_of::<Root>() as u32; | |||
286 | let dirstate_map = DirstateMap { |
|
292 | let dirstate_map = DirstateMap { | |
287 | on_disk, |
|
293 | on_disk, | |
288 | root: dirstate_map::ChildNodes::OnDisk(read_nodes( |
|
294 | root: dirstate_map::ChildNodes::OnDisk(read_nodes( | |
@@ -292,6 +298,7 b" pub(super) fn read<'on_disk>(" | |||||
292 | nodes_with_entry_count: root.nodes_with_entry_count.get(), |
|
298 | nodes_with_entry_count: root.nodes_with_entry_count.get(), | |
293 | nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(), |
|
299 | nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(), | |
294 | ignore_patterns_hash: root.ignore_patterns_hash, |
|
300 | ignore_patterns_hash: root.ignore_patterns_hash, | |
|
301 | unreachable_bytes, | |||
295 | }; |
|
302 | }; | |
296 | Ok(dirstate_map) |
|
303 | Ok(dirstate_map) | |
297 | } |
|
304 | } | |
@@ -573,6 +580,7 b' pub(super) fn write(' | |||||
573 | nodes_with_copy_source_count: dirstate_map |
|
580 | nodes_with_copy_source_count: dirstate_map | |
574 | .nodes_with_copy_source_count |
|
581 | .nodes_with_copy_source_count | |
575 | .into(), |
|
582 | .into(), | |
|
583 | unreachable_bytes: dirstate_map.unreachable_bytes.into(), | |||
576 | ignore_patterns_hash: dirstate_map.ignore_patterns_hash, |
|
584 | ignore_patterns_hash: dirstate_map.ignore_patterns_hash, | |
577 | }; |
|
585 | }; | |
578 | writer.out.extend(root.as_bytes()); |
|
586 | writer.out.extend(root.as_bytes()); |
General Comments 0
You need to be logged in to leave comments.
Login now