##// END OF EJS Templates
dirstate-v2: shrink on-disk path lengths to 16-bits...
Simon Sapin -
r48477:da1c0cd6 default
parent child Browse files
Show More
@@ -26,7 +26,7 use crate::DirstateEntry;
26 use crate::DirstateError;
26 use crate::DirstateError;
27 use crate::DirstateParents;
27 use crate::DirstateParents;
28 use crate::EntryState;
28 use crate::EntryState;
29 use bytes_cast::unaligned::{I32Be, I64Be, U32Be};
29 use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be};
30 use bytes_cast::BytesCast;
30 use bytes_cast::BytesCast;
31 use format_bytes::format_bytes;
31 use format_bytes::format_bytes;
32 use std::borrow::Cow;
32 use std::borrow::Cow;
@@ -54,7 +54,10 struct DocketHeader {
54 marker: [u8; V2_FORMAT_MARKER.len()],
54 marker: [u8; V2_FORMAT_MARKER.len()],
55 parent_1: [u8; STORED_NODE_ID_BYTES],
55 parent_1: [u8; STORED_NODE_ID_BYTES],
56 parent_2: [u8; STORED_NODE_ID_BYTES],
56 parent_2: [u8; STORED_NODE_ID_BYTES],
57
58 /// Counted in bytes
57 data_size: Size,
59 data_size: Size,
60
58 uuid_size: u8,
61 uuid_size: u8,
59 }
62 }
60
63
@@ -98,7 +101,7 pub(super) struct Node {
98 full_path: PathSlice,
101 full_path: PathSlice,
99
102
100 /// In bytes from `self.full_path.start`
103 /// In bytes from `self.full_path.start`
101 base_name_start: Size,
104 base_name_start: PathSize,
102
105
103 copy_source: OptPathSlice,
106 copy_source: OptPathSlice,
104 children: ChildNodes,
107 children: ChildNodes,
@@ -167,20 +170,13 type Offset = U32Be;
167
170
168 /// Counted in number of items
171 /// Counted in number of items
169 ///
172 ///
170 /// NOTE: not supporting directories with more than 4 billion direct children,
173 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
171 /// or filenames more than 4 GiB.
172 type Size = U32Be;
174 type Size = U32Be;
173
175
174 /// Location of consecutive, fixed-size items.
176 /// Counted in bytes
175 ///
177 ///
176 /// An item can be a single byte for paths, or a struct with
178 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
177 /// `derive(BytesCast)`.
179 type PathSize = U16Be;
178 #[derive(BytesCast, Copy, Clone)]
179 #[repr(C)]
180 struct Slice {
181 start: Offset,
182 len: Size,
183 }
184
180
185 /// A contiguous sequence of `len` times `Node`, representing the child nodes
181 /// A contiguous sequence of `len` times `Node`, representing the child nodes
186 /// of either some other node or of the repository root.
182 /// of either some other node or of the repository root.
@@ -188,19 +184,29 struct Slice {
188 /// Always sorted by ascending `full_path`, to allow binary search.
184 /// Always sorted by ascending `full_path`, to allow binary search.
189 /// Since nodes with the same parent nodes also have the same parent path,
185 /// Since nodes with the same parent nodes also have the same parent path,
190 /// only the `base_name`s need to be compared during binary search.
186 /// only the `base_name`s need to be compared during binary search.
191 type ChildNodes = Slice;
187 #[derive(BytesCast, Copy, Clone)]
188 #[repr(C)]
189 struct ChildNodes {
190 start: Offset,
191 len: Size,
192 }
192
193
193 /// A `HgPath` of `len` bytes
194 /// A `HgPath` of `len` bytes
194 type PathSlice = Slice;
195 #[derive(BytesCast, Copy, Clone)]
196 #[repr(C)]
197 struct PathSlice {
198 start: Offset,
199 len: PathSize,
200 }
195
201
196 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
202 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
197 type OptPathSlice = Slice;
203 type OptPathSlice = PathSlice;
198
204
199 /// Make sure that size-affecting changes are made knowingly
205 /// Make sure that size-affecting changes are made knowingly
200 fn _static_assert_size_of() {
206 fn _static_assert_size_of() {
201 let _ = std::mem::transmute::<DocketHeader, [u8; 81]>;
207 let _ = std::mem::transmute::<DocketHeader, [u8; 81]>;
202 let _ = std::mem::transmute::<Root, [u8; 36]>;
208 let _ = std::mem::transmute::<Root, [u8; 36]>;
203 let _ = std::mem::transmute::<Node, [u8; 49]>;
209 let _ = std::mem::transmute::<Node, [u8; 43]>;
204 }
210 }
205
211
206 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
212 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
@@ -279,7 +285,7 pub(super) fn read<'on_disk>(
279 let root = read_root(on_disk)?;
285 let root = read_root(on_disk)?;
280 let dirstate_map = DirstateMap {
286 let dirstate_map = DirstateMap {
281 on_disk,
287 on_disk,
282 root: dirstate_map::ChildNodes::OnDisk(read_slice::<Node>(
288 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
283 on_disk,
289 on_disk,
284 root.root_nodes,
290 root.root_nodes,
285 )?),
291 )?),
@@ -408,7 +414,7 impl Node {
408 &self,
414 &self,
409 on_disk: &'on_disk [u8],
415 on_disk: &'on_disk [u8],
410 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
416 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
411 read_slice::<Node>(on_disk, self.children)
417 read_nodes(on_disk, self.children)
412 }
418 }
413
419
414 pub(super) fn to_in_memory_node<'on_disk>(
420 pub(super) fn to_in_memory_node<'on_disk>(
@@ -483,23 +489,31 impl From<&'_ Timestamp> for SystemTime
483
489
484 fn read_hg_path(
490 fn read_hg_path(
485 on_disk: &[u8],
491 on_disk: &[u8],
486 slice: Slice,
492 slice: PathSlice,
487 ) -> Result<&HgPath, DirstateV2ParseError> {
493 ) -> Result<&HgPath, DirstateV2ParseError> {
488 let bytes = read_slice::<u8>(on_disk, slice)?;
494 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
489 Ok(HgPath::new(bytes))
490 }
495 }
491
496
492 fn read_slice<T>(
497 fn read_nodes(
493 on_disk: &[u8],
498 on_disk: &[u8],
494 slice: Slice,
499 slice: ChildNodes,
500 ) -> Result<&[Node], DirstateV2ParseError> {
501 read_slice(on_disk, slice.start, slice.len.get())
502 }
503
504 fn read_slice<T, Len>(
505 on_disk: &[u8],
506 start: Offset,
507 len: Len,
495 ) -> Result<&[T], DirstateV2ParseError>
508 ) -> Result<&[T], DirstateV2ParseError>
496 where
509 where
497 T: BytesCast,
510 T: BytesCast,
511 Len: TryInto<usize>,
498 {
512 {
499 // Either `usize::MAX` would result in "out of bounds" error since a single
513 // Either `usize::MAX` would result in "out of bounds" error since a single
500 // `&[u8]` cannot occupy the entire addess space.
514 // `&[u8]` cannot occupy the entire addess space.
501 let start = usize::try_from(slice.start.get()).unwrap_or(std::usize::MAX);
515 let start = start.get().try_into().unwrap_or(std::usize::MAX);
502 let len = usize::try_from(slice.len.get()).unwrap_or(std::usize::MAX);
516 let len = len.try_into().unwrap_or(std::usize::MAX);
503 on_disk
517 on_disk
504 .get(start..)
518 .get(start..)
505 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
519 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
@@ -514,10 +528,10 pub(crate) fn for_each_tracked_path<'on_
514 let root = read_root(on_disk)?;
528 let root = read_root(on_disk)?;
515 fn recur<'on_disk>(
529 fn recur<'on_disk>(
516 on_disk: &'on_disk [u8],
530 on_disk: &'on_disk [u8],
517 nodes: Slice,
531 nodes: ChildNodes,
518 f: &mut impl FnMut(&'on_disk HgPath),
532 f: &mut impl FnMut(&'on_disk HgPath),
519 ) -> Result<(), DirstateV2ParseError> {
533 ) -> Result<(), DirstateV2ParseError> {
520 for node in read_slice::<Node>(on_disk, nodes)? {
534 for node in read_nodes(on_disk, nodes)? {
521 if let Some(state) = node.state()? {
535 if let Some(state) = node.state()? {
522 if state.is_tracked() {
536 if state.is_tracked() {
523 f(node.full_path(on_disk)?)
537 f(node.full_path(on_disk)?)
@@ -565,9 +579,10 fn write_nodes(
565 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
579 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
566 // order. Sort to enable binary search in the written file.
580 // order. Sort to enable binary search in the written file.
567 let nodes = nodes.sorted();
581 let nodes = nodes.sorted();
582 let nodes_len = nodes.len();
568
583
569 // First accumulate serialized nodes in a `Vec`
584 // First accumulate serialized nodes in a `Vec`
570 let mut on_disk_nodes = Vec::with_capacity(nodes.len());
585 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
571 for node in nodes {
586 for node in nodes {
572 let children = write_nodes(
587 let children = write_nodes(
573 dirstate_map,
588 dirstate_map,
@@ -575,12 +590,12 fn write_nodes(
575 out,
590 out,
576 )?;
591 )?;
577 let full_path = node.full_path(dirstate_map.on_disk)?;
592 let full_path = node.full_path(dirstate_map.on_disk)?;
578 let full_path = write_slice::<u8>(full_path.as_bytes(), out);
593 let full_path = write_path(full_path.as_bytes(), out);
579 let copy_source =
594 let copy_source =
580 if let Some(source) = node.copy_source(dirstate_map.on_disk)? {
595 if let Some(source) = node.copy_source(dirstate_map.on_disk)? {
581 write_slice::<u8>(source.as_bytes(), out)
596 write_path(source.as_bytes(), out)
582 } else {
597 } else {
583 Slice {
598 PathSlice {
584 start: 0.into(),
599 start: 0.into(),
585 len: 0.into(),
600 len: 0.into(),
586 }
601 }
@@ -612,9 +627,9 fn write_nodes(
612 children,
627 children,
613 copy_source,
628 copy_source,
614 full_path,
629 full_path,
615 base_name_start: u32::try_from(path.base_name_start())
630 base_name_start: u16::try_from(path.base_name_start())
616 // Could only panic for paths over 4 GiB
631 // Could only panic for paths over 64 KiB
617 .expect("dirstate-v2 offset overflow")
632 .expect("dirstate-v2 path length overflow")
618 .into(),
633 .into(),
619 descendants_with_entry_count: node
634 descendants_with_entry_count: node
620 .descendants_with_entry_count
635 .descendants_with_entry_count
@@ -634,23 +649,30 fn write_nodes(
634 },
649 },
635 })
650 })
636 }
651 }
637 // … so we can write them contiguously
652 // … so we can write them contiguously, after writing everything else they
638 Ok(write_slice::<Node>(&on_disk_nodes, out))
653 // refer to.
654 let start = current_offset(out);
655 let len = u32::try_from(nodes_len)
656 // Could only panic with over 4 billion nodes
657 .expect("dirstate-v2 path length overflow")
658 .into();
659 out.extend(on_disk_nodes.as_bytes());
660 Ok(ChildNodes { start, len })
639 }
661 }
640
662
641 fn write_slice<T>(slice: &[T], out: &mut Vec<u8>) -> Slice
663 fn current_offset(out: &Vec<u8>) -> Offset {
642 where
664 u32::try_from(out.len())
643 T: BytesCast,
644 {
645 let start = u32::try_from(out.len())
646 // Could only panic for a dirstate file larger than 4 GiB
665 // Could only panic for a dirstate file larger than 4 GiB
647 .expect("dirstate-v2 offset overflow")
666 .expect("dirstate-v2 offset overflow")
648 .into();
667 .into()
649 let len = u32::try_from(slice.len())
668 }
650 // Could only panic for paths over 4 GiB or nodes with over 4 billions
669
651 // child nodes
670 fn write_path(slice: &[u8], out: &mut Vec<u8>) -> PathSlice {
652 .expect("dirstate-v2 offset overflow")
671 let start = current_offset(out);
672 let len = u16::try_from(slice.len())
673 // Could only panic for paths over 64 KiB
674 .expect("dirstate-v2 path length overflow")
653 .into();
675 .into();
654 out.extend(slice.as_bytes());
676 out.extend(slice.as_bytes());
655 Slice { start, len }
677 PathSlice { start, len }
656 }
678 }
General Comments 0
You need to be logged in to leave comments. Login now