Show More
@@ -26,7 +26,7 use crate::DirstateEntry; | |||
|
26 | 26 | use crate::DirstateError; |
|
27 | 27 | use crate::DirstateParents; |
|
28 | 28 | use crate::EntryState; |
|
29 | use bytes_cast::unaligned::{I32Be, I64Be, U32Be}; | |
|
29 | use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be}; | |
|
30 | 30 | use bytes_cast::BytesCast; |
|
31 | 31 | use format_bytes::format_bytes; |
|
32 | 32 | use std::borrow::Cow; |
@@ -54,7 +54,10 struct DocketHeader { | |||
|
54 | 54 | marker: [u8; V2_FORMAT_MARKER.len()], |
|
55 | 55 | parent_1: [u8; STORED_NODE_ID_BYTES], |
|
56 | 56 | parent_2: [u8; STORED_NODE_ID_BYTES], |
|
57 | ||
|
58 | /// Counted in bytes | |
|
57 | 59 | data_size: Size, |
|
60 | ||
|
58 | 61 | uuid_size: u8, |
|
59 | 62 | } |
|
60 | 63 | |
@@ -98,7 +101,7 pub(super) struct Node { | |||
|
98 | 101 | full_path: PathSlice, |
|
99 | 102 | |
|
100 | 103 | /// In bytes from `self.full_path.start` |
|
101 | base_name_start: Size, | |
|
104 | base_name_start: PathSize, | |
|
102 | 105 | |
|
103 | 106 | copy_source: OptPathSlice, |
|
104 | 107 | children: ChildNodes, |
@@ -167,20 +170,13 type Offset = U32Be; | |||
|
167 | 170 | |
|
168 | 171 | /// Counted in number of items |
|
169 | 172 | /// |
|
170 |
/// NOTE: not support |
|
|
171 | /// or filenames more than 4 GiB. | |
|
173 | /// NOTE: we choose not to support counting more than 4 billion nodes anywhere. | |
|
172 | 174 | type Size = U32Be; |
|
173 | 175 | |
|
174 | /// Location of consecutive, fixed-size items. | |
|
176 | /// Counted in bytes | |
|
175 | 177 | /// |
|
176 | /// An item can be a single byte for paths, or a struct with | |
|
177 | /// `derive(BytesCast)`. | |
|
178 | #[derive(BytesCast, Copy, Clone)] | |
|
179 | #[repr(C)] | |
|
180 | struct Slice { | |
|
181 | start: Offset, | |
|
182 | len: Size, | |
|
183 | } | |
|
178 | /// NOTE: we choose not to support file names/paths longer than 64 KiB. | |
|
179 | type PathSize = U16Be; | |
|
184 | 180 | |
|
185 | 181 | /// A contiguous sequence of `len` times `Node`, representing the child nodes |
|
186 | 182 | /// of either some other node or of the repository root. |
@@ -188,19 +184,29 struct Slice { | |||
|
188 | 184 | /// Always sorted by ascending `full_path`, to allow binary search. |
|
189 | 185 | /// Since nodes with the same parent nodes also have the same parent path, |
|
190 | 186 | /// only the `base_name`s need to be compared during binary search. |
|
191 | type ChildNodes = Slice; | |
|
187 | #[derive(BytesCast, Copy, Clone)] | |
|
188 | #[repr(C)] | |
|
189 | struct ChildNodes { | |
|
190 | start: Offset, | |
|
191 | len: Size, | |
|
192 | } | |
|
192 | 193 | |
|
193 | 194 | /// A `HgPath` of `len` bytes |
|
194 | type PathSlice = Slice; | |
|
195 | #[derive(BytesCast, Copy, Clone)] | |
|
196 | #[repr(C)] | |
|
197 | struct PathSlice { | |
|
198 | start: Offset, | |
|
199 | len: PathSize, | |
|
200 | } | |
|
195 | 201 | |
|
196 | 202 | /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes |
|
197 | type OptPathSlice = Slice; | |
|
203 | type OptPathSlice = PathSlice; | |
|
198 | 204 | |
|
199 | 205 | /// Make sure that size-affecting changes are made knowingly |
|
200 | 206 | fn _static_assert_size_of() { |
|
201 | 207 | let _ = std::mem::transmute::<DocketHeader, [u8; 81]>; |
|
202 | 208 | let _ = std::mem::transmute::<Root, [u8; 36]>; |
|
203 |
let _ = std::mem::transmute::<Node, [u8; 4 |
|
|
209 | let _ = std::mem::transmute::<Node, [u8; 43]>; | |
|
204 | 210 | } |
|
205 | 211 | |
|
206 | 212 | /// Unexpected file format found in `.hg/dirstate` with the "v2" format. |
@@ -279,7 +285,7 pub(super) fn read<'on_disk>( | |||
|
279 | 285 | let root = read_root(on_disk)?; |
|
280 | 286 | let dirstate_map = DirstateMap { |
|
281 | 287 | on_disk, |
|
282 |
root: dirstate_map::ChildNodes::OnDisk(read_ |
|
|
288 | root: dirstate_map::ChildNodes::OnDisk(read_nodes( | |
|
283 | 289 | on_disk, |
|
284 | 290 | root.root_nodes, |
|
285 | 291 | )?), |
@@ -408,7 +414,7 impl Node { | |||
|
408 | 414 | &self, |
|
409 | 415 | on_disk: &'on_disk [u8], |
|
410 | 416 | ) -> Result<&'on_disk [Node], DirstateV2ParseError> { |
|
411 |
read_ |
|
|
417 | read_nodes(on_disk, self.children) | |
|
412 | 418 | } |
|
413 | 419 | |
|
414 | 420 | pub(super) fn to_in_memory_node<'on_disk>( |
@@ -483,23 +489,31 impl From<&'_ Timestamp> for SystemTime | |||
|
483 | 489 | |
|
484 | 490 | fn read_hg_path( |
|
485 | 491 | on_disk: &[u8], |
|
486 | slice: Slice, | |
|
492 | slice: PathSlice, | |
|
487 | 493 | ) -> Result<&HgPath, DirstateV2ParseError> { |
|
488 | let bytes = read_slice::<u8>(on_disk, slice)?; | |
|
489 | Ok(HgPath::new(bytes)) | |
|
494 | read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new) | |
|
490 | 495 | } |
|
491 | 496 | |
|
492 |
fn read_s |
|
|
497 | fn read_nodes( | |
|
493 | 498 | on_disk: &[u8], |
|
494 |
slice: |
|
|
499 | slice: ChildNodes, | |
|
500 | ) -> Result<&[Node], DirstateV2ParseError> { | |
|
501 | read_slice(on_disk, slice.start, slice.len.get()) | |
|
502 | } | |
|
503 | ||
|
504 | fn read_slice<T, Len>( | |
|
505 | on_disk: &[u8], | |
|
506 | start: Offset, | |
|
507 | len: Len, | |
|
495 | 508 | ) -> Result<&[T], DirstateV2ParseError> |
|
496 | 509 | where |
|
497 | 510 | T: BytesCast, |
|
511 | Len: TryInto<usize>, | |
|
498 | 512 | { |
|
499 | 513 | // Either `usize::MAX` would result in "out of bounds" error since a single |
|
500 | 514 | // `&[u8]` cannot occupy the entire addess space. |
|
501 |
let start = |
|
|
502 |
let len = |
|
|
515 | let start = start.get().try_into().unwrap_or(std::usize::MAX); | |
|
516 | let len = len.try_into().unwrap_or(std::usize::MAX); | |
|
503 | 517 | on_disk |
|
504 | 518 | .get(start..) |
|
505 | 519 | .and_then(|bytes| T::slice_from_bytes(bytes, len).ok()) |
@@ -514,10 +528,10 pub(crate) fn for_each_tracked_path<'on_ | |||
|
514 | 528 | let root = read_root(on_disk)?; |
|
515 | 529 | fn recur<'on_disk>( |
|
516 | 530 | on_disk: &'on_disk [u8], |
|
517 |
nodes: |
|
|
531 | nodes: ChildNodes, | |
|
518 | 532 | f: &mut impl FnMut(&'on_disk HgPath), |
|
519 | 533 | ) -> Result<(), DirstateV2ParseError> { |
|
520 |
for node in read_ |
|
|
534 | for node in read_nodes(on_disk, nodes)? { | |
|
521 | 535 | if let Some(state) = node.state()? { |
|
522 | 536 | if state.is_tracked() { |
|
523 | 537 | f(node.full_path(on_disk)?) |
@@ -565,9 +579,10 fn write_nodes( | |||
|
565 | 579 | // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration |
|
566 | 580 | // order. Sort to enable binary search in the written file. |
|
567 | 581 | let nodes = nodes.sorted(); |
|
582 | let nodes_len = nodes.len(); | |
|
568 | 583 | |
|
569 | 584 | // First accumulate serialized nodes in a `Vec` |
|
570 |
let mut on_disk_nodes = Vec::with_capacity(nodes |
|
|
585 | let mut on_disk_nodes = Vec::with_capacity(nodes_len); | |
|
571 | 586 | for node in nodes { |
|
572 | 587 | let children = write_nodes( |
|
573 | 588 | dirstate_map, |
@@ -575,12 +590,12 fn write_nodes( | |||
|
575 | 590 | out, |
|
576 | 591 | )?; |
|
577 | 592 | let full_path = node.full_path(dirstate_map.on_disk)?; |
|
578 |
let full_path = write_ |
|
|
593 | let full_path = write_path(full_path.as_bytes(), out); | |
|
579 | 594 | let copy_source = |
|
580 | 595 | if let Some(source) = node.copy_source(dirstate_map.on_disk)? { |
|
581 |
write_ |
|
|
596 | write_path(source.as_bytes(), out) | |
|
582 | 597 | } else { |
|
583 | Slice { | |
|
598 | PathSlice { | |
|
584 | 599 | start: 0.into(), |
|
585 | 600 | len: 0.into(), |
|
586 | 601 | } |
@@ -612,9 +627,9 fn write_nodes( | |||
|
612 | 627 | children, |
|
613 | 628 | copy_source, |
|
614 | 629 | full_path, |
|
615 |
base_name_start: u |
|
|
616 |
// Could only panic for paths over |
|
|
617 |
.expect("dirstate-v2 |
|
|
630 | base_name_start: u16::try_from(path.base_name_start()) | |
|
631 | // Could only panic for paths over 64 KiB | |
|
632 | .expect("dirstate-v2 path length overflow") | |
|
618 | 633 | .into(), |
|
619 | 634 | descendants_with_entry_count: node |
|
620 | 635 | .descendants_with_entry_count |
@@ -634,23 +649,30 fn write_nodes( | |||
|
634 | 649 | }, |
|
635 | 650 | }) |
|
636 | 651 | } |
|
637 | // … so we can write them contiguously | |
|
638 | Ok(write_slice::<Node>(&on_disk_nodes, out)) | |
|
652 | // … so we can write them contiguously, after writing everything else they | |
|
653 | // refer to. | |
|
654 | let start = current_offset(out); | |
|
655 | let len = u32::try_from(nodes_len) | |
|
656 | // Could only panic with over 4 billion nodes | |
|
657 | .expect("dirstate-v2 path length overflow") | |
|
658 | .into(); | |
|
659 | out.extend(on_disk_nodes.as_bytes()); | |
|
660 | Ok(ChildNodes { start, len }) | |
|
639 | 661 | } |
|
640 | 662 | |
|
641 | fn write_slice<T>(slice: &[T], out: &mut Vec<u8>) -> Slice | |
|
642 | where | |
|
643 | T: BytesCast, | |
|
644 | { | |
|
645 | let start = u32::try_from(out.len()) | |
|
663 | fn current_offset(out: &Vec<u8>) -> Offset { | |
|
664 | u32::try_from(out.len()) | |
|
646 | 665 | // Could only panic for a dirstate file larger than 4 GiB |
|
647 | 666 | .expect("dirstate-v2 offset overflow") |
|
648 |
.into() |
|
|
649 | let len = u32::try_from(slice.len()) | |
|
650 | // Could only panic for paths over 4 GiB or nodes with over 4 billions | |
|
651 | // child nodes | |
|
652 | .expect("dirstate-v2 offset overflow") | |
|
667 | .into() | |
|
668 | } | |
|
669 | ||
|
670 | fn write_path(slice: &[u8], out: &mut Vec<u8>) -> PathSlice { | |
|
671 | let start = current_offset(out); | |
|
672 | let len = u16::try_from(slice.len()) | |
|
673 | // Could only panic for paths over 64 KiB | |
|
674 | .expect("dirstate-v2 path length overflow") | |
|
653 | 675 | .into(); |
|
654 | 676 | out.extend(slice.as_bytes()); |
|
655 | Slice { start, len } | |
|
677 | PathSlice { start, len } | |
|
656 | 678 | } |
General Comments 0
You need to be logged in to leave comments.
Login now