Show More
@@ -26,7 +26,7 use crate::DirstateEntry; | |||||
26 | use crate::DirstateError; |
|
26 | use crate::DirstateError; | |
27 | use crate::DirstateParents; |
|
27 | use crate::DirstateParents; | |
28 | use crate::EntryState; |
|
28 | use crate::EntryState; | |
29 | use bytes_cast::unaligned::{I32Be, I64Be, U32Be}; |
|
29 | use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be}; | |
30 | use bytes_cast::BytesCast; |
|
30 | use bytes_cast::BytesCast; | |
31 | use format_bytes::format_bytes; |
|
31 | use format_bytes::format_bytes; | |
32 | use std::borrow::Cow; |
|
32 | use std::borrow::Cow; | |
@@ -54,7 +54,10 struct DocketHeader { | |||||
54 | marker: [u8; V2_FORMAT_MARKER.len()], |
|
54 | marker: [u8; V2_FORMAT_MARKER.len()], | |
55 | parent_1: [u8; STORED_NODE_ID_BYTES], |
|
55 | parent_1: [u8; STORED_NODE_ID_BYTES], | |
56 | parent_2: [u8; STORED_NODE_ID_BYTES], |
|
56 | parent_2: [u8; STORED_NODE_ID_BYTES], | |
|
57 | ||||
|
58 | /// Counted in bytes | |||
57 | data_size: Size, |
|
59 | data_size: Size, | |
|
60 | ||||
58 | uuid_size: u8, |
|
61 | uuid_size: u8, | |
59 | } |
|
62 | } | |
60 |
|
63 | |||
@@ -98,7 +101,7 pub(super) struct Node { | |||||
98 | full_path: PathSlice, |
|
101 | full_path: PathSlice, | |
99 |
|
102 | |||
100 | /// In bytes from `self.full_path.start` |
|
103 | /// In bytes from `self.full_path.start` | |
101 | base_name_start: Size, |
|
104 | base_name_start: PathSize, | |
102 |
|
105 | |||
103 | copy_source: OptPathSlice, |
|
106 | copy_source: OptPathSlice, | |
104 | children: ChildNodes, |
|
107 | children: ChildNodes, | |
@@ -167,20 +170,13 type Offset = U32Be; | |||||
167 |
|
170 | |||
168 | /// Counted in number of items |
|
171 | /// Counted in number of items | |
169 | /// |
|
172 | /// | |
170 |
/// NOTE: not support |
|
173 | /// NOTE: we choose not to support counting more than 4 billion nodes anywhere. | |
171 | /// or filenames more than 4 GiB. |
|
|||
172 | type Size = U32Be; |
|
174 | type Size = U32Be; | |
173 |
|
175 | |||
174 | /// Location of consecutive, fixed-size items. |
|
176 | /// Counted in bytes | |
175 | /// |
|
177 | /// | |
176 | /// An item can be a single byte for paths, or a struct with |
|
178 | /// NOTE: we choose not to support file names/paths longer than 64 KiB. | |
177 | /// `derive(BytesCast)`. |
|
179 | type PathSize = U16Be; | |
178 | #[derive(BytesCast, Copy, Clone)] |
|
|||
179 | #[repr(C)] |
|
|||
180 | struct Slice { |
|
|||
181 | start: Offset, |
|
|||
182 | len: Size, |
|
|||
183 | } |
|
|||
184 |
|
180 | |||
185 | /// A contiguous sequence of `len` times `Node`, representing the child nodes |
|
181 | /// A contiguous sequence of `len` times `Node`, representing the child nodes | |
186 | /// of either some other node or of the repository root. |
|
182 | /// of either some other node or of the repository root. | |
@@ -188,19 +184,29 struct Slice { | |||||
188 | /// Always sorted by ascending `full_path`, to allow binary search. |
|
184 | /// Always sorted by ascending `full_path`, to allow binary search. | |
189 | /// Since nodes with the same parent nodes also have the same parent path, |
|
185 | /// Since nodes with the same parent nodes also have the same parent path, | |
190 | /// only the `base_name`s need to be compared during binary search. |
|
186 | /// only the `base_name`s need to be compared during binary search. | |
191 | type ChildNodes = Slice; |
|
187 | #[derive(BytesCast, Copy, Clone)] | |
|
188 | #[repr(C)] | |||
|
189 | struct ChildNodes { | |||
|
190 | start: Offset, | |||
|
191 | len: Size, | |||
|
192 | } | |||
192 |
|
193 | |||
193 | /// A `HgPath` of `len` bytes |
|
194 | /// A `HgPath` of `len` bytes | |
194 | type PathSlice = Slice; |
|
195 | #[derive(BytesCast, Copy, Clone)] | |
|
196 | #[repr(C)] | |||
|
197 | struct PathSlice { | |||
|
198 | start: Offset, | |||
|
199 | len: PathSize, | |||
|
200 | } | |||
195 |
|
201 | |||
196 | /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes |
|
202 | /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes | |
197 | type OptPathSlice = Slice; |
|
203 | type OptPathSlice = PathSlice; | |
198 |
|
204 | |||
199 | /// Make sure that size-affecting changes are made knowingly |
|
205 | /// Make sure that size-affecting changes are made knowingly | |
200 | fn _static_assert_size_of() { |
|
206 | fn _static_assert_size_of() { | |
201 | let _ = std::mem::transmute::<DocketHeader, [u8; 81]>; |
|
207 | let _ = std::mem::transmute::<DocketHeader, [u8; 81]>; | |
202 | let _ = std::mem::transmute::<Root, [u8; 36]>; |
|
208 | let _ = std::mem::transmute::<Root, [u8; 36]>; | |
203 |
let _ = std::mem::transmute::<Node, [u8; 4 |
|
209 | let _ = std::mem::transmute::<Node, [u8; 43]>; | |
204 | } |
|
210 | } | |
205 |
|
211 | |||
206 | /// Unexpected file format found in `.hg/dirstate` with the "v2" format. |
|
212 | /// Unexpected file format found in `.hg/dirstate` with the "v2" format. | |
@@ -279,7 +285,7 pub(super) fn read<'on_disk>( | |||||
279 | let root = read_root(on_disk)?; |
|
285 | let root = read_root(on_disk)?; | |
280 | let dirstate_map = DirstateMap { |
|
286 | let dirstate_map = DirstateMap { | |
281 | on_disk, |
|
287 | on_disk, | |
282 |
root: dirstate_map::ChildNodes::OnDisk(read_ |
|
288 | root: dirstate_map::ChildNodes::OnDisk(read_nodes( | |
283 | on_disk, |
|
289 | on_disk, | |
284 | root.root_nodes, |
|
290 | root.root_nodes, | |
285 | )?), |
|
291 | )?), | |
@@ -408,7 +414,7 impl Node { | |||||
408 | &self, |
|
414 | &self, | |
409 | on_disk: &'on_disk [u8], |
|
415 | on_disk: &'on_disk [u8], | |
410 | ) -> Result<&'on_disk [Node], DirstateV2ParseError> { |
|
416 | ) -> Result<&'on_disk [Node], DirstateV2ParseError> { | |
411 |
read_ |
|
417 | read_nodes(on_disk, self.children) | |
412 | } |
|
418 | } | |
413 |
|
419 | |||
414 | pub(super) fn to_in_memory_node<'on_disk>( |
|
420 | pub(super) fn to_in_memory_node<'on_disk>( | |
@@ -483,23 +489,31 impl From<&'_ Timestamp> for SystemTime | |||||
483 |
|
489 | |||
484 | fn read_hg_path( |
|
490 | fn read_hg_path( | |
485 | on_disk: &[u8], |
|
491 | on_disk: &[u8], | |
486 | slice: Slice, |
|
492 | slice: PathSlice, | |
487 | ) -> Result<&HgPath, DirstateV2ParseError> { |
|
493 | ) -> Result<&HgPath, DirstateV2ParseError> { | |
488 | let bytes = read_slice::<u8>(on_disk, slice)?; |
|
494 | read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new) | |
489 | Ok(HgPath::new(bytes)) |
|
|||
490 | } |
|
495 | } | |
491 |
|
496 | |||
492 |
fn read_s |
|
497 | fn read_nodes( | |
493 | on_disk: &[u8], |
|
498 | on_disk: &[u8], | |
494 |
slice: |
|
499 | slice: ChildNodes, | |
|
500 | ) -> Result<&[Node], DirstateV2ParseError> { | |||
|
501 | read_slice(on_disk, slice.start, slice.len.get()) | |||
|
502 | } | |||
|
503 | ||||
|
504 | fn read_slice<T, Len>( | |||
|
505 | on_disk: &[u8], | |||
|
506 | start: Offset, | |||
|
507 | len: Len, | |||
495 | ) -> Result<&[T], DirstateV2ParseError> |
|
508 | ) -> Result<&[T], DirstateV2ParseError> | |
496 | where |
|
509 | where | |
497 | T: BytesCast, |
|
510 | T: BytesCast, | |
|
511 | Len: TryInto<usize>, | |||
498 | { |
|
512 | { | |
499 | // Either `usize::MAX` would result in "out of bounds" error since a single |
|
513 | // Either `usize::MAX` would result in "out of bounds" error since a single | |
500 | // `&[u8]` cannot occupy the entire addess space. |
|
514 | // `&[u8]` cannot occupy the entire addess space. | |
501 |
let start = |
|
515 | let start = start.get().try_into().unwrap_or(std::usize::MAX); | |
502 |
let len = |
|
516 | let len = len.try_into().unwrap_or(std::usize::MAX); | |
503 | on_disk |
|
517 | on_disk | |
504 | .get(start..) |
|
518 | .get(start..) | |
505 | .and_then(|bytes| T::slice_from_bytes(bytes, len).ok()) |
|
519 | .and_then(|bytes| T::slice_from_bytes(bytes, len).ok()) | |
@@ -514,10 +528,10 pub(crate) fn for_each_tracked_path<'on_ | |||||
514 | let root = read_root(on_disk)?; |
|
528 | let root = read_root(on_disk)?; | |
515 | fn recur<'on_disk>( |
|
529 | fn recur<'on_disk>( | |
516 | on_disk: &'on_disk [u8], |
|
530 | on_disk: &'on_disk [u8], | |
517 |
nodes: |
|
531 | nodes: ChildNodes, | |
518 | f: &mut impl FnMut(&'on_disk HgPath), |
|
532 | f: &mut impl FnMut(&'on_disk HgPath), | |
519 | ) -> Result<(), DirstateV2ParseError> { |
|
533 | ) -> Result<(), DirstateV2ParseError> { | |
520 |
for node in read_ |
|
534 | for node in read_nodes(on_disk, nodes)? { | |
521 | if let Some(state) = node.state()? { |
|
535 | if let Some(state) = node.state()? { | |
522 | if state.is_tracked() { |
|
536 | if state.is_tracked() { | |
523 | f(node.full_path(on_disk)?) |
|
537 | f(node.full_path(on_disk)?) | |
@@ -565,9 +579,10 fn write_nodes( | |||||
565 | // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration |
|
579 | // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration | |
566 | // order. Sort to enable binary search in the written file. |
|
580 | // order. Sort to enable binary search in the written file. | |
567 | let nodes = nodes.sorted(); |
|
581 | let nodes = nodes.sorted(); | |
|
582 | let nodes_len = nodes.len(); | |||
568 |
|
583 | |||
569 | // First accumulate serialized nodes in a `Vec` |
|
584 | // First accumulate serialized nodes in a `Vec` | |
570 |
let mut on_disk_nodes = Vec::with_capacity(nodes |
|
585 | let mut on_disk_nodes = Vec::with_capacity(nodes_len); | |
571 | for node in nodes { |
|
586 | for node in nodes { | |
572 | let children = write_nodes( |
|
587 | let children = write_nodes( | |
573 | dirstate_map, |
|
588 | dirstate_map, | |
@@ -575,12 +590,12 fn write_nodes( | |||||
575 | out, |
|
590 | out, | |
576 | )?; |
|
591 | )?; | |
577 | let full_path = node.full_path(dirstate_map.on_disk)?; |
|
592 | let full_path = node.full_path(dirstate_map.on_disk)?; | |
578 |
let full_path = write_ |
|
593 | let full_path = write_path(full_path.as_bytes(), out); | |
579 | let copy_source = |
|
594 | let copy_source = | |
580 | if let Some(source) = node.copy_source(dirstate_map.on_disk)? { |
|
595 | if let Some(source) = node.copy_source(dirstate_map.on_disk)? { | |
581 |
write_ |
|
596 | write_path(source.as_bytes(), out) | |
582 | } else { |
|
597 | } else { | |
583 | Slice { |
|
598 | PathSlice { | |
584 | start: 0.into(), |
|
599 | start: 0.into(), | |
585 | len: 0.into(), |
|
600 | len: 0.into(), | |
586 | } |
|
601 | } | |
@@ -612,9 +627,9 fn write_nodes( | |||||
612 | children, |
|
627 | children, | |
613 | copy_source, |
|
628 | copy_source, | |
614 | full_path, |
|
629 | full_path, | |
615 |
base_name_start: u |
|
630 | base_name_start: u16::try_from(path.base_name_start()) | |
616 |
// Could only panic for paths over |
|
631 | // Could only panic for paths over 64 KiB | |
617 |
.expect("dirstate-v2 |
|
632 | .expect("dirstate-v2 path length overflow") | |
618 | .into(), |
|
633 | .into(), | |
619 | descendants_with_entry_count: node |
|
634 | descendants_with_entry_count: node | |
620 | .descendants_with_entry_count |
|
635 | .descendants_with_entry_count | |
@@ -634,23 +649,30 fn write_nodes( | |||||
634 | }, |
|
649 | }, | |
635 | }) |
|
650 | }) | |
636 | } |
|
651 | } | |
637 | // β¦ so we can write them contiguously |
|
652 | // β¦ so we can write them contiguously, after writing everything else they | |
638 | Ok(write_slice::<Node>(&on_disk_nodes, out)) |
|
653 | // refer to. | |
|
654 | let start = current_offset(out); | |||
|
655 | let len = u32::try_from(nodes_len) | |||
|
656 | // Could only panic with over 4 billion nodes | |||
|
657 | .expect("dirstate-v2 path length overflow") | |||
|
658 | .into(); | |||
|
659 | out.extend(on_disk_nodes.as_bytes()); | |||
|
660 | Ok(ChildNodes { start, len }) | |||
639 | } |
|
661 | } | |
640 |
|
662 | |||
641 | fn write_slice<T>(slice: &[T], out: &mut Vec<u8>) -> Slice |
|
663 | fn current_offset(out: &Vec<u8>) -> Offset { | |
642 | where |
|
664 | u32::try_from(out.len()) | |
643 | T: BytesCast, |
|
|||
644 | { |
|
|||
645 | let start = u32::try_from(out.len()) |
|
|||
646 | // Could only panic for a dirstate file larger than 4 GiB |
|
665 | // Could only panic for a dirstate file larger than 4 GiB | |
647 | .expect("dirstate-v2 offset overflow") |
|
666 | .expect("dirstate-v2 offset overflow") | |
648 |
.into() |
|
667 | .into() | |
649 | let len = u32::try_from(slice.len()) |
|
668 | } | |
650 | // Could only panic for paths over 4 GiB or nodes with over 4 billions |
|
669 | ||
651 | // child nodes |
|
670 | fn write_path(slice: &[u8], out: &mut Vec<u8>) -> PathSlice { | |
652 | .expect("dirstate-v2 offset overflow") |
|
671 | let start = current_offset(out); | |
|
672 | let len = u16::try_from(slice.len()) | |||
|
673 | // Could only panic for paths over 64 KiB | |||
|
674 | .expect("dirstate-v2 path length overflow") | |||
653 | .into(); |
|
675 | .into(); | |
654 | out.extend(slice.as_bytes()); |
|
676 | out.extend(slice.as_bytes()); | |
655 | Slice { start, len } |
|
677 | PathSlice { start, len } | |
656 | } |
|
678 | } |
General Comments 0
You need to be logged in to leave comments.
Login now