##// END OF EJS Templates
dirstate-v2: shrink on-disk path lengths to 16-bits...
Simon Sapin -
r48477:da1c0cd6 default
parent child Browse files
Show More
@@ -26,7 +26,7 b' use crate::DirstateEntry;'
26 26 use crate::DirstateError;
27 27 use crate::DirstateParents;
28 28 use crate::EntryState;
29 use bytes_cast::unaligned::{I32Be, I64Be, U32Be};
29 use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be};
30 30 use bytes_cast::BytesCast;
31 31 use format_bytes::format_bytes;
32 32 use std::borrow::Cow;
@@ -54,7 +54,10 b' struct DocketHeader {'
54 54 marker: [u8; V2_FORMAT_MARKER.len()],
55 55 parent_1: [u8; STORED_NODE_ID_BYTES],
56 56 parent_2: [u8; STORED_NODE_ID_BYTES],
57
58 /// Counted in bytes
57 59 data_size: Size,
60
58 61 uuid_size: u8,
59 62 }
60 63
@@ -98,7 +101,7 b' pub(super) struct Node {'
98 101 full_path: PathSlice,
99 102
100 103 /// In bytes from `self.full_path.start`
101 base_name_start: Size,
104 base_name_start: PathSize,
102 105
103 106 copy_source: OptPathSlice,
104 107 children: ChildNodes,
@@ -167,20 +170,13 b' type Offset = U32Be;'
167 170
168 171 /// Counted in number of items
169 172 ///
170 /// NOTE: not supporting directories with more than 4 billion direct children,
171 /// or filenames more than 4 GiB.
173 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
172 174 type Size = U32Be;
173 175
174 /// Location of consecutive, fixed-size items.
176 /// Counted in bytes
175 177 ///
176 /// An item can be a single byte for paths, or a struct with
177 /// `derive(BytesCast)`.
178 #[derive(BytesCast, Copy, Clone)]
179 #[repr(C)]
180 struct Slice {
181 start: Offset,
182 len: Size,
183 }
178 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
179 type PathSize = U16Be;
184 180
185 181 /// A contiguous sequence of `len` times `Node`, representing the child nodes
186 182 /// of either some other node or of the repository root.
@@ -188,19 +184,29 b' struct Slice {'
188 184 /// Always sorted by ascending `full_path`, to allow binary search.
189 185 /// Since nodes with the same parent nodes also have the same parent path,
190 186 /// only the `base_name`s need to be compared during binary search.
191 type ChildNodes = Slice;
187 #[derive(BytesCast, Copy, Clone)]
188 #[repr(C)]
189 struct ChildNodes {
190 start: Offset,
191 len: Size,
192 }
192 193
193 194 /// A `HgPath` of `len` bytes
194 type PathSlice = Slice;
195 #[derive(BytesCast, Copy, Clone)]
196 #[repr(C)]
197 struct PathSlice {
198 start: Offset,
199 len: PathSize,
200 }
195 201
196 202 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
197 type OptPathSlice = Slice;
203 type OptPathSlice = PathSlice;
198 204
199 205 /// Make sure that size-affecting changes are made knowingly
200 206 fn _static_assert_size_of() {
201 207 let _ = std::mem::transmute::<DocketHeader, [u8; 81]>;
202 208 let _ = std::mem::transmute::<Root, [u8; 36]>;
203 let _ = std::mem::transmute::<Node, [u8; 49]>;
209 let _ = std::mem::transmute::<Node, [u8; 43]>;
204 210 }
205 211
206 212 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
@@ -279,7 +285,7 b" pub(super) fn read<'on_disk>("
279 285 let root = read_root(on_disk)?;
280 286 let dirstate_map = DirstateMap {
281 287 on_disk,
282 root: dirstate_map::ChildNodes::OnDisk(read_slice::<Node>(
288 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
283 289 on_disk,
284 290 root.root_nodes,
285 291 )?),
@@ -408,7 +414,7 b' impl Node {'
408 414 &self,
409 415 on_disk: &'on_disk [u8],
410 416 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
411 read_slice::<Node>(on_disk, self.children)
417 read_nodes(on_disk, self.children)
412 418 }
413 419
414 420 pub(super) fn to_in_memory_node<'on_disk>(
@@ -483,23 +489,31 b" impl From<&'_ Timestamp> for SystemTime "
483 489
484 490 fn read_hg_path(
485 491 on_disk: &[u8],
486 slice: Slice,
492 slice: PathSlice,
487 493 ) -> Result<&HgPath, DirstateV2ParseError> {
488 let bytes = read_slice::<u8>(on_disk, slice)?;
489 Ok(HgPath::new(bytes))
494 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
490 495 }
491 496
492 fn read_slice<T>(
497 fn read_nodes(
493 498 on_disk: &[u8],
494 slice: Slice,
499 slice: ChildNodes,
500 ) -> Result<&[Node], DirstateV2ParseError> {
501 read_slice(on_disk, slice.start, slice.len.get())
502 }
503
504 fn read_slice<T, Len>(
505 on_disk: &[u8],
506 start: Offset,
507 len: Len,
495 508 ) -> Result<&[T], DirstateV2ParseError>
496 509 where
497 510 T: BytesCast,
511 Len: TryInto<usize>,
498 512 {
499 513 // Either `usize::MAX` would result in "out of bounds" error since a single
500 514 // `&[u8]` cannot occupy the entire addess space.
501 let start = usize::try_from(slice.start.get()).unwrap_or(std::usize::MAX);
502 let len = usize::try_from(slice.len.get()).unwrap_or(std::usize::MAX);
515 let start = start.get().try_into().unwrap_or(std::usize::MAX);
516 let len = len.try_into().unwrap_or(std::usize::MAX);
503 517 on_disk
504 518 .get(start..)
505 519 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
@@ -514,10 +528,10 b" pub(crate) fn for_each_tracked_path<'on_"
514 528 let root = read_root(on_disk)?;
515 529 fn recur<'on_disk>(
516 530 on_disk: &'on_disk [u8],
517 nodes: Slice,
531 nodes: ChildNodes,
518 532 f: &mut impl FnMut(&'on_disk HgPath),
519 533 ) -> Result<(), DirstateV2ParseError> {
520 for node in read_slice::<Node>(on_disk, nodes)? {
534 for node in read_nodes(on_disk, nodes)? {
521 535 if let Some(state) = node.state()? {
522 536 if state.is_tracked() {
523 537 f(node.full_path(on_disk)?)
@@ -565,9 +579,10 b' fn write_nodes('
565 579 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
566 580 // order. Sort to enable binary search in the written file.
567 581 let nodes = nodes.sorted();
582 let nodes_len = nodes.len();
568 583
569 584 // First accumulate serialized nodes in a `Vec`
570 let mut on_disk_nodes = Vec::with_capacity(nodes.len());
585 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
571 586 for node in nodes {
572 587 let children = write_nodes(
573 588 dirstate_map,
@@ -575,12 +590,12 b' fn write_nodes('
575 590 out,
576 591 )?;
577 592 let full_path = node.full_path(dirstate_map.on_disk)?;
578 let full_path = write_slice::<u8>(full_path.as_bytes(), out);
593 let full_path = write_path(full_path.as_bytes(), out);
579 594 let copy_source =
580 595 if let Some(source) = node.copy_source(dirstate_map.on_disk)? {
581 write_slice::<u8>(source.as_bytes(), out)
596 write_path(source.as_bytes(), out)
582 597 } else {
583 Slice {
598 PathSlice {
584 599 start: 0.into(),
585 600 len: 0.into(),
586 601 }
@@ -612,9 +627,9 b' fn write_nodes('
612 627 children,
613 628 copy_source,
614 629 full_path,
615 base_name_start: u32::try_from(path.base_name_start())
616 // Could only panic for paths over 4 GiB
617 .expect("dirstate-v2 offset overflow")
630 base_name_start: u16::try_from(path.base_name_start())
631 // Could only panic for paths over 64 KiB
632 .expect("dirstate-v2 path length overflow")
618 633 .into(),
619 634 descendants_with_entry_count: node
620 635 .descendants_with_entry_count
@@ -634,23 +649,30 b' fn write_nodes('
634 649 },
635 650 })
636 651 }
637 // … so we can write them contiguously
638 Ok(write_slice::<Node>(&on_disk_nodes, out))
652 // … so we can write them contiguously, after writing everything else they
653 // refer to.
654 let start = current_offset(out);
655 let len = u32::try_from(nodes_len)
656 // Could only panic with over 4 billion nodes
657 .expect("dirstate-v2 path length overflow")
658 .into();
659 out.extend(on_disk_nodes.as_bytes());
660 Ok(ChildNodes { start, len })
639 661 }
640 662
641 fn write_slice<T>(slice: &[T], out: &mut Vec<u8>) -> Slice
642 where
643 T: BytesCast,
644 {
645 let start = u32::try_from(out.len())
663 fn current_offset(out: &Vec<u8>) -> Offset {
664 u32::try_from(out.len())
646 665 // Could only panic for a dirstate file larger than 4 GiB
647 666 .expect("dirstate-v2 offset overflow")
648 .into();
649 let len = u32::try_from(slice.len())
650 // Could only panic for paths over 4 GiB or nodes with over 4 billions
651 // child nodes
652 .expect("dirstate-v2 offset overflow")
667 .into()
668 }
669
670 fn write_path(slice: &[u8], out: &mut Vec<u8>) -> PathSlice {
671 let start = current_offset(out);
672 let len = u16::try_from(slice.len())
673 // Could only panic for paths over 64 KiB
674 .expect("dirstate-v2 path length overflow")
653 675 .into();
654 676 out.extend(slice.as_bytes());
655 Slice { start, len }
677 PathSlice { start, len }
656 678 }
General Comments 0
You need to be logged in to leave comments. Login now