on_disk.rs
760 lines
| 25.3 KiB
| application/rls-services+xml
|
RustLexer
Simon Sapin
|
r48058 | //! The "version 2" disk representation of the dirstate | ||
//! | ||||
//! # File format | ||||
//! | ||||
Simon Sapin
|
r48476 | //! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts | ||
//! with a fixed-sized header whose layout is defined by the `DocketHeader` | ||||
//! struct, followed by the data file identifier. | ||||
//! | ||||
//! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That | ||||
//! file may be longer than the size given in the docket, but not shorter. Only | ||||
//! the start of the data file up to the given size is considered. The | ||||
//! fixed-size "root" of the dirstate tree whose layout is defined by the | ||||
//! `Root` struct is found at the end of that slice of data. | ||||
//! | ||||
//! Its `root_nodes` field contains the slice (offset and length) to | ||||
Simon Sapin
|
r48058 | //! the nodes representing the files and directories at the root of the | ||
//! repository. Each node is also fixed-size, defined by the `Node` struct. | ||||
//! Nodes in turn contain slices to variable-size paths, and to their own child | ||||
//! nodes (if any) for nested files and directories. | ||||
Simon Sapin
|
r48124 | use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef}; | ||
Simon Sapin
|
r48058 | use crate::dirstate_tree::path_with_basename::WithBasename; | ||
use crate::errors::HgError; | ||||
use crate::utils::hg_path::HgPath; | ||||
use crate::DirstateEntry; | ||||
use crate::DirstateError; | ||||
use crate::DirstateParents; | ||||
Simon Sapin
|
r48128 | use crate::EntryState; | ||
Simon Sapin
|
r48477 | use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be}; | ||
Simon Sapin
|
r48058 | use bytes_cast::BytesCast; | ||
Simon Sapin
|
r48474 | use format_bytes::format_bytes; | ||
Simon Sapin
|
r48058 | use std::borrow::Cow; | ||
Simon Sapin
|
r48475 | use std::convert::{TryFrom, TryInto}; | ||
Simon Sapin
|
r48137 | use std::time::{Duration, SystemTime, UNIX_EPOCH}; | ||
Simon Sapin
|
r48058 | |||
Simon Sapin
|
r48055 | /// Added at the start of `.hg/dirstate` when the "v2" format is used. | ||
Simon Sapin
|
r48058 | /// This a redundant sanity check more than an actual "magic number" since | ||
/// `.hg/requires` already governs which format should be used. | ||||
Simon Sapin
|
r48055 | pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n"; | ||
Simon Sapin
|
r48058 | |||
Simon Sapin
|
r48474 | /// Keep space for 256-bit hashes | ||
const STORED_NODE_ID_BYTES: usize = 32; | ||||
/// … even though only 160 bits are used for now, with SHA-1 | ||||
const USED_NODE_ID_BYTES: usize = 20; | ||||
Simon Sapin
|
r48202 | pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20; | ||
pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN]; | ||||
Simon Sapin
|
r48482 | /// Must match the constant of the same name in | ||
/// `mercurial/dirstateutils/docket.py` | ||||
Simon Sapin
|
r48484 | const TREE_METADATA_SIZE: usize = 44; | ||
Simon Sapin
|
r48482 | |||
/// Make sure that size-affecting changes are made knowingly | ||||
#[allow(unused)] | ||||
fn static_assert_size_of() { | ||||
let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>; | ||||
Simon Sapin
|
r48484 | let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>; | ||
Simon Sapin
|
r48482 | let _ = std::mem::transmute::<Node, [u8; 43]>; | ||
} | ||||
Simon Sapin
|
r48474 | // Must match `HEADER` in `mercurial/dirstateutils/docket.py` | ||
#[derive(BytesCast)] | ||||
#[repr(C)] | ||||
struct DocketHeader { | ||||
marker: [u8; V2_FORMAT_MARKER.len()], | ||||
parent_1: [u8; STORED_NODE_ID_BYTES], | ||||
parent_2: [u8; STORED_NODE_ID_BYTES], | ||||
Simon Sapin
|
r48477 | |||
/// Counted in bytes | ||||
Simon Sapin
|
r48474 | data_size: Size, | ||
Simon Sapin
|
r48477 | |||
Simon Sapin
|
r48482 | metadata: TreeMetadata, | ||
Simon Sapin
|
r48474 | uuid_size: u8, | ||
} | ||||
pub struct Docket<'on_disk> { | ||||
header: &'on_disk DocketHeader, | ||||
uuid: &'on_disk [u8], | ||||
} | ||||
Simon Sapin
|
r48058 | #[derive(BytesCast)] | ||
#[repr(C)] | ||||
Simon Sapin
|
r48482 | struct TreeMetadata { | ||
Simon Sapin
|
r48476 | root_nodes: ChildNodes, | ||
Simon Sapin
|
r48058 | nodes_with_entry_count: Size, | ||
nodes_with_copy_source_count: Size, | ||||
Simon Sapin
|
r48202 | |||
Simon Sapin
|
r48481 | /// How many bytes of this data file are not used anymore | ||
unreachable_bytes: Size, | ||||
Simon Sapin
|
r48484 | /// Current version always sets these bytes to zero when creating or | ||
/// updating a dirstate. Future versions could assign some bits to signal | ||||
/// for example "the version that last wrote/updated this dirstate did so | ||||
/// in such and such way that can be relied on by versions that know to." | ||||
unused: [u8; 4], | ||||
Simon Sapin
|
r48202 | /// If non-zero, a hash of ignore files that were used for some previous | ||
/// run of the `status` algorithm. | ||||
/// | ||||
/// We define: | ||||
/// | ||||
/// * "Root" ignore files are `.hgignore` at the root of the repository if | ||||
/// it exists, and files from `ui.ignore.*` config. This set of files is | ||||
/// then sorted by the string representation of their path. | ||||
/// * The "expanded contents" of an ignore files is the byte string made | ||||
/// by concatenating its contents with the "expanded contents" of other | ||||
/// files included with `include:` or `subinclude:` files, in inclusion | ||||
/// order. This definition is recursive, as included files can | ||||
/// themselves include more files. | ||||
/// | ||||
/// This hash is defined as the SHA-1 of the concatenation (in sorted | ||||
/// order) of the "expanded contents" of each "root" ignore file. | ||||
/// (Note that computing this does not require actually concatenating byte | ||||
/// strings into contiguous memory, instead SHA-1 hashing can be done | ||||
/// incrementally.) | ||||
ignore_patterns_hash: IgnorePatternsHash, | ||||
Simon Sapin
|
r48058 | } | ||
#[derive(BytesCast)] | ||||
#[repr(C)] | ||||
Simon Sapin
|
r48128 | pub(super) struct Node { | ||
Simon Sapin
|
r48058 | full_path: PathSlice, | ||
/// In bytes from `self.full_path.start` | ||||
Simon Sapin
|
r48477 | base_name_start: PathSize, | ||
Simon Sapin
|
r48058 | |||
copy_source: OptPathSlice, | ||||
children: ChildNodes, | ||||
Simon Sapin
|
r48272 | pub(super) descendants_with_entry_count: Size, | ||
Simon Sapin
|
r48128 | pub(super) tracked_descendants_count: Size, | ||
Simon Sapin
|
r48137 | |||
Simon Sapin
|
r48285 | /// Depending on the value of `state`: | ||
Simon Sapin
|
r48137 | /// | ||
Simon Sapin
|
r48138 | /// * A null byte: `data` is not used. | ||
/// | ||||
Simon Sapin
|
r48137 | /// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together | ||
Simon Sapin
|
r48138 | /// represent a dirstate entry like in the v1 format. | ||
/// | ||||
Simon Sapin
|
r48137 | /// * A `d` ASCII byte: the bytes of `data` should instead be interpreted | ||
/// as the `Timestamp` for the mtime of a cached directory. | ||||
/// | ||||
Simon Sapin
|
r48138 | /// The presence of this state means that at some point, this path in | ||
/// the working directory was observed: | ||||
/// | ||||
/// - To be a directory | ||||
/// - With the modification time as given by `Timestamp` | ||||
/// - That timestamp was already strictly in the past when observed, | ||||
/// meaning that later changes cannot happen in the same clock tick | ||||
/// and must cause a different modification time (unless the system | ||||
/// clock jumps back and we get unlucky, which is not impossible but | ||||
/// but deemed unlikely enough). | ||||
Simon Sapin
|
r48269 | /// - All direct children of this directory (as returned by | ||
/// `std::fs::read_dir`) either have a corresponding dirstate node, or | ||||
/// are ignored by ignore patterns whose hash is in | ||||
Simon Sapin
|
r48482 | /// `TreeMetadata::ignore_patterns_hash`. | ||
Simon Sapin
|
r48138 | /// | ||
/// This means that if `std::fs::symlink_metadata` later reports the | ||||
Simon Sapin
|
r48269 | /// same modification time and ignored patterns haven’t changed, a run | ||
/// of status that is not listing ignored files can skip calling | ||||
/// `std::fs::read_dir` again for this directory, iterate child | ||||
/// dirstate nodes instead. | ||||
Simon Sapin
|
r48137 | state: u8, | ||
data: Entry, | ||||
Simon Sapin
|
r48058 | } | ||
Simon Sapin
|
r48128 | #[derive(BytesCast, Copy, Clone)] | ||
Simon Sapin
|
r48058 | #[repr(C)] | ||
Simon Sapin
|
r48137 | struct Entry { | ||
Simon Sapin
|
r48058 | mode: I32Be, | ||
mtime: I32Be, | ||||
size: I32Be, | ||||
} | ||||
Simon Sapin
|
r48137 | /// Duration since the Unix epoch | ||
Simon Sapin
|
r48138 | #[derive(BytesCast, Copy, Clone, PartialEq)] | ||
Simon Sapin
|
r48137 | #[repr(C)] | ||
pub(super) struct Timestamp { | ||||
seconds: I64Be, | ||||
/// In `0 .. 1_000_000_000`. | ||||
/// | ||||
/// This timestamp is later or earlier than `(seconds, 0)` by this many | ||||
/// nanoseconds, if `seconds` is non-negative or negative, respectively. | ||||
nanoseconds: U32Be, | ||||
} | ||||
Simon Sapin
|
r48058 | /// Counted in bytes from the start of the file | ||
/// | ||||
Simon Sapin
|
r48270 | /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB. | ||
type Offset = U32Be; | ||||
Simon Sapin
|
r48058 | |||
/// Counted in number of items | ||||
/// | ||||
Simon Sapin
|
r48477 | /// NOTE: we choose not to support counting more than 4 billion nodes anywhere. | ||
Simon Sapin
|
r48058 | type Size = U32Be; | ||
Simon Sapin
|
r48477 | /// Counted in bytes | ||
Simon Sapin
|
r48058 | /// | ||
Simon Sapin
|
r48477 | /// NOTE: we choose not to support file names/paths longer than 64 KiB. | ||
type PathSize = U16Be; | ||||
Simon Sapin
|
r48058 | |||
/// A contiguous sequence of `len` times `Node`, representing the child nodes | ||||
/// of either some other node or of the repository root. | ||||
/// | ||||
/// Always sorted by ascending `full_path`, to allow binary search. | ||||
/// Since nodes with the same parent nodes also have the same parent path, | ||||
/// only the `base_name`s need to be compared during binary search. | ||||
Simon Sapin
|
r48477 | #[derive(BytesCast, Copy, Clone)] | ||
#[repr(C)] | ||||
struct ChildNodes { | ||||
start: Offset, | ||||
len: Size, | ||||
} | ||||
Simon Sapin
|
r48058 | |||
/// A `HgPath` of `len` bytes | ||||
Simon Sapin
|
r48477 | #[derive(BytesCast, Copy, Clone)] | ||
#[repr(C)] | ||||
struct PathSlice { | ||||
start: Offset, | ||||
len: PathSize, | ||||
} | ||||
Simon Sapin
|
r48058 | |||
/// Either nothing if `start == 0`, or a `HgPath` of `len` bytes | ||||
Simon Sapin
|
r48477 | type OptPathSlice = PathSlice; | ||
Simon Sapin
|
r48058 | |||
Simon Sapin
|
r48125 | /// Unexpected file format found in `.hg/dirstate` with the "v2" format. | ||
Simon Sapin
|
r48126 | /// | ||
/// This should only happen if Mercurial is buggy or a repository is corrupted. | ||||
#[derive(Debug)] | ||||
pub struct DirstateV2ParseError; | ||||
Simon Sapin
|
r48125 | |||
impl From<DirstateV2ParseError> for HgError { | ||||
fn from(_: DirstateV2ParseError) -> Self { | ||||
HgError::corrupted("dirstate-v2 parse error") | ||||
} | ||||
} | ||||
impl From<DirstateV2ParseError> for crate::DirstateError { | ||||
fn from(error: DirstateV2ParseError) -> Self { | ||||
HgError::from(error).into() | ||||
} | ||||
} | ||||
Simon Sapin
|
r48474 | impl<'on_disk> Docket<'on_disk> { | ||
pub fn parents(&self) -> DirstateParents { | ||||
use crate::Node; | ||||
let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES]) | ||||
.unwrap() | ||||
.clone(); | ||||
let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES]) | ||||
.unwrap() | ||||
.clone(); | ||||
DirstateParents { p1, p2 } | ||||
} | ||||
Simon Sapin
|
r48482 | pub fn tree_metadata(&self) -> &[u8] { | ||
self.header.metadata.as_bytes() | ||||
} | ||||
Simon Sapin
|
r48475 | pub fn data_size(&self) -> usize { | ||
// This `unwrap` could only panic on a 16-bit CPU | ||||
self.header.data_size.get().try_into().unwrap() | ||||
} | ||||
Simon Sapin
|
r48474 | pub fn data_filename(&self) -> String { | ||
String::from_utf8(format_bytes!(b"dirstate.{}.d", self.uuid)).unwrap() | ||||
} | ||||
} | ||||
pub fn read_docket( | ||||
on_disk: &[u8], | ||||
) -> Result<Docket<'_>, DirstateV2ParseError> { | ||||
let (header, uuid) = | ||||
DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?; | ||||
let uuid_size = header.uuid_size as usize; | ||||
if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size { | ||||
Ok(Docket { header, uuid }) | ||||
Simon Sapin
|
r48165 | } else { | ||
Err(DirstateV2ParseError) | ||||
} | ||||
} | ||||
Simon Sapin
|
r48058 | pub(super) fn read<'on_disk>( | ||
on_disk: &'on_disk [u8], | ||||
Simon Sapin
|
r48482 | metadata: &[u8], | ||
Simon Sapin
|
r48474 | ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> { | ||
Simon Sapin
|
r48058 | if on_disk.is_empty() { | ||
Simon Sapin
|
r48474 | return Ok(DirstateMap::empty(on_disk)); | ||
Simon Sapin
|
r48058 | } | ||
Simon Sapin
|
r48482 | let (meta, _) = TreeMetadata::from_bytes(metadata) | ||
.map_err(|_| DirstateV2ParseError)?; | ||||
Simon Sapin
|
r48058 | let dirstate_map = DirstateMap { | ||
on_disk, | ||||
Simon Sapin
|
r48477 | root: dirstate_map::ChildNodes::OnDisk(read_nodes( | ||
Simon Sapin
|
r48165 | on_disk, | ||
Simon Sapin
|
r48482 | meta.root_nodes, | ||
Simon Sapin
|
r48128 | )?), | ||
Simon Sapin
|
r48482 | nodes_with_entry_count: meta.nodes_with_entry_count.get(), | ||
nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(), | ||||
ignore_patterns_hash: meta.ignore_patterns_hash, | ||||
unreachable_bytes: meta.unreachable_bytes.get(), | ||||
Simon Sapin
|
r48058 | }; | ||
Simon Sapin
|
r48474 | Ok(dirstate_map) | ||
Simon Sapin
|
r48058 | } | ||
impl Node { | ||||
Simon Sapin
|
r48128 | pub(super) fn full_path<'on_disk>( | ||
Simon Sapin
|
r48058 | &self, | ||
on_disk: &'on_disk [u8], | ||||
Simon Sapin
|
r48128 | ) -> Result<&'on_disk HgPath, DirstateV2ParseError> { | ||
read_hg_path(on_disk, self.full_path) | ||||
} | ||||
pub(super) fn base_name_start<'on_disk>( | ||||
&self, | ||||
) -> Result<usize, DirstateV2ParseError> { | ||||
let start = self.base_name_start.get(); | ||||
if start < self.full_path.len.get() { | ||||
let start = usize::try_from(start) | ||||
// u32 -> usize, could only panic on a 16-bit CPU | ||||
.expect("dirstate-v2 base_name_start out of bounds"); | ||||
Ok(start) | ||||
Simon Sapin
|
r48058 | } else { | ||
Simon Sapin
|
r48125 | Err(DirstateV2ParseError) | ||
Simon Sapin
|
r48058 | } | ||
} | ||||
Simon Sapin
|
r48128 | pub(super) fn base_name<'on_disk>( | ||
&self, | ||||
on_disk: &'on_disk [u8], | ||||
) -> Result<&'on_disk HgPath, DirstateV2ParseError> { | ||||
let full_path = self.full_path(on_disk)?; | ||||
let base_name_start = self.base_name_start()?; | ||||
Ok(HgPath::new(&full_path.as_bytes()[base_name_start..])) | ||||
} | ||||
pub(super) fn path<'on_disk>( | ||||
&self, | ||||
on_disk: &'on_disk [u8], | ||||
) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> { | ||||
Ok(WithBasename::from_raw_parts( | ||||
Cow::Borrowed(self.full_path(on_disk)?), | ||||
self.base_name_start()?, | ||||
)) | ||||
} | ||||
pub(super) fn has_copy_source<'on_disk>(&self) -> bool { | ||||
self.copy_source.start.get() != 0 | ||||
} | ||||
Simon Sapin
|
r48058 | pub(super) fn copy_source<'on_disk>( | ||
&self, | ||||
on_disk: &'on_disk [u8], | ||||
Simon Sapin
|
r48128 | ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> { | ||
Ok(if self.has_copy_source() { | ||||
Simon Sapin
|
r48058 | Some(read_hg_path(on_disk, self.copy_source)?) | ||
} else { | ||||
None | ||||
}) | ||||
} | ||||
Simon Sapin
|
r48137 | pub(super) fn node_data( | ||
&self, | ||||
) -> Result<dirstate_map::NodeData, DirstateV2ParseError> { | ||||
let entry = |state| { | ||||
dirstate_map::NodeData::Entry(self.entry_with_given_state(state)) | ||||
}; | ||||
match self.state { | ||||
b'\0' => Ok(dirstate_map::NodeData::None), | ||||
b'd' => Ok(dirstate_map::NodeData::CachedDirectory { | ||||
mtime: *self.data.as_timestamp(), | ||||
}), | ||||
b'n' => Ok(entry(EntryState::Normal)), | ||||
b'a' => Ok(entry(EntryState::Added)), | ||||
b'r' => Ok(entry(EntryState::Removed)), | ||||
b'm' => Ok(entry(EntryState::Merged)), | ||||
_ => Err(DirstateV2ParseError), | ||||
} | ||||
Simon Sapin
|
r48128 | } | ||
Simon Sapin
|
r48138 | pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> { | ||
if self.state == b'd' { | ||||
Some(self.data.as_timestamp()) | ||||
} else { | ||||
None | ||||
} | ||||
} | ||||
Simon Sapin
|
r48128 | pub(super) fn state( | ||
&self, | ||||
) -> Result<Option<EntryState>, DirstateV2ParseError> { | ||||
Simon Sapin
|
r48137 | match self.state { | ||
b'\0' | b'd' => Ok(None), | ||||
b'n' => Ok(Some(EntryState::Normal)), | ||||
b'a' => Ok(Some(EntryState::Added)), | ||||
b'r' => Ok(Some(EntryState::Removed)), | ||||
b'm' => Ok(Some(EntryState::Merged)), | ||||
_ => Err(DirstateV2ParseError), | ||||
} | ||||
} | ||||
fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry { | ||||
DirstateEntry { | ||||
state, | ||||
mode: self.data.mode.get(), | ||||
mtime: self.data.mtime.get(), | ||||
size: self.data.size.get(), | ||||
} | ||||
Simon Sapin
|
r48128 | } | ||
Simon Sapin
|
r48125 | pub(super) fn entry( | ||
&self, | ||||
) -> Result<Option<DirstateEntry>, DirstateV2ParseError> { | ||||
Simon Sapin
|
r48137 | Ok(self | ||
.state()? | ||||
.map(|state| self.entry_with_given_state(state))) | ||||
Simon Sapin
|
r48128 | } | ||
pub(super) fn children<'on_disk>( | ||||
&self, | ||||
on_disk: &'on_disk [u8], | ||||
) -> Result<&'on_disk [Node], DirstateV2ParseError> { | ||||
Simon Sapin
|
r48477 | read_nodes(on_disk, self.children) | ||
Simon Sapin
|
r48058 | } | ||
pub(super) fn to_in_memory_node<'on_disk>( | ||||
&self, | ||||
on_disk: &'on_disk [u8], | ||||
Simon Sapin
|
r48125 | ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> { | ||
Simon Sapin
|
r48058 | Ok(dirstate_map::Node { | ||
Simon Sapin
|
r48128 | children: dirstate_map::ChildNodes::OnDisk( | ||
self.children(on_disk)?, | ||||
), | ||||
copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed), | ||||
Simon Sapin
|
r48137 | data: self.node_data()?, | ||
Simon Sapin
|
r48272 | descendants_with_entry_count: self | ||
.descendants_with_entry_count | ||||
.get(), | ||||
Simon Sapin
|
r48058 | tracked_descendants_count: self.tracked_descendants_count.get(), | ||
}) | ||||
} | ||||
} | ||||
Simon Sapin
|
r48137 | impl Entry { | ||
fn from_timestamp(timestamp: Timestamp) -> Self { | ||||
// Safety: both types implement the `ByteCast` trait, so we could | ||||
// safely use `as_bytes` and `from_bytes` to do this conversion. Using | ||||
// `transmute` instead makes the compiler check that the two types | ||||
// have the same size, which eliminates the error case of | ||||
// `from_bytes`. | ||||
unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) } | ||||
} | ||||
fn as_timestamp(&self) -> &Timestamp { | ||||
// Safety: same as above in `from_timestamp` | ||||
unsafe { &*(self as *const Entry as *const Timestamp) } | ||||
} | ||||
} | ||||
Simon Sapin
|
r48140 | impl Timestamp { | ||
pub fn seconds(&self) -> i64 { | ||||
self.seconds.get() | ||||
} | ||||
} | ||||
Simon Sapin
|
r48138 | impl From<SystemTime> for Timestamp { | ||
fn from(system_time: SystemTime) -> Self { | ||||
Simon Sapin
|
r48137 | let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) { | ||
Ok(duration) => { | ||||
(duration.as_secs() as i64, duration.subsec_nanos()) | ||||
} | ||||
Err(error) => { | ||||
let negative = error.duration(); | ||||
(-(negative.as_secs() as i64), negative.subsec_nanos()) | ||||
} | ||||
}; | ||||
Timestamp { | ||||
seconds: secs.into(), | ||||
nanoseconds: nanos.into(), | ||||
} | ||||
} | ||||
} | ||||
impl From<&'_ Timestamp> for SystemTime { | ||||
fn from(timestamp: &'_ Timestamp) -> Self { | ||||
let secs = timestamp.seconds.get(); | ||||
let nanos = timestamp.nanoseconds.get(); | ||||
if secs >= 0 { | ||||
UNIX_EPOCH + Duration::new(secs as u64, nanos) | ||||
} else { | ||||
UNIX_EPOCH - Duration::new((-secs) as u64, nanos) | ||||
} | ||||
} | ||||
} | ||||
Simon Sapin
|
r48125 | fn read_hg_path( | ||
on_disk: &[u8], | ||||
Simon Sapin
|
r48477 | slice: PathSlice, | ||
Simon Sapin
|
r48128 | ) -> Result<&HgPath, DirstateV2ParseError> { | ||
Simon Sapin
|
r48477 | read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new) | ||
Simon Sapin
|
r48058 | } | ||
Simon Sapin
|
r48477 | fn read_nodes( | ||
Simon Sapin
|
r48125 | on_disk: &[u8], | ||
Simon Sapin
|
r48477 | slice: ChildNodes, | ||
) -> Result<&[Node], DirstateV2ParseError> { | ||||
read_slice(on_disk, slice.start, slice.len.get()) | ||||
} | ||||
fn read_slice<T, Len>( | ||||
on_disk: &[u8], | ||||
start: Offset, | ||||
len: Len, | ||||
Simon Sapin
|
r48125 | ) -> Result<&[T], DirstateV2ParseError> | ||
Simon Sapin
|
r48058 | where | ||
T: BytesCast, | ||||
Simon Sapin
|
r48477 | Len: TryInto<usize>, | ||
Simon Sapin
|
r48058 | { | ||
// Either `usize::MAX` would result in "out of bounds" error since a single | ||||
// `&[u8]` cannot occupy the entire addess space. | ||||
Simon Sapin
|
r48477 | let start = start.get().try_into().unwrap_or(std::usize::MAX); | ||
let len = len.try_into().unwrap_or(std::usize::MAX); | ||||
Simon Sapin
|
r48058 | on_disk | ||
.get(start..) | ||||
.and_then(|bytes| T::slice_from_bytes(bytes, len).ok()) | ||||
.map(|(slice, _rest)| slice) | ||||
Simon Sapin
|
r48125 | .ok_or_else(|| DirstateV2ParseError) | ||
Simon Sapin
|
r48058 | } | ||
Simon Sapin
|
r48165 | pub(crate) fn for_each_tracked_path<'on_disk>( | ||
on_disk: &'on_disk [u8], | ||||
Simon Sapin
|
r48482 | metadata: &[u8], | ||
Simon Sapin
|
r48165 | mut f: impl FnMut(&'on_disk HgPath), | ||
) -> Result<(), DirstateV2ParseError> { | ||||
Simon Sapin
|
r48482 | let (meta, _) = TreeMetadata::from_bytes(metadata) | ||
.map_err(|_| DirstateV2ParseError)?; | ||||
Simon Sapin
|
r48165 | fn recur<'on_disk>( | ||
on_disk: &'on_disk [u8], | ||||
Simon Sapin
|
r48477 | nodes: ChildNodes, | ||
Simon Sapin
|
r48165 | f: &mut impl FnMut(&'on_disk HgPath), | ||
) -> Result<(), DirstateV2ParseError> { | ||||
Simon Sapin
|
r48477 | for node in read_nodes(on_disk, nodes)? { | ||
Simon Sapin
|
r48165 | if let Some(state) = node.state()? { | ||
if state.is_tracked() { | ||||
f(node.full_path(on_disk)?) | ||||
} | ||||
} | ||||
recur(on_disk, node.children, f)? | ||||
} | ||||
Ok(()) | ||||
} | ||||
Simon Sapin
|
r48482 | recur(on_disk, meta.root_nodes, &mut f) | ||
Simon Sapin
|
r48165 | } | ||
Simon Sapin
|
r48482 | /// Returns new data and metadata, together with whether that data should be | ||
/// appended to the existing data file whose content is at | ||||
/// `dirstate_map.on_disk` (true), instead of written to a new data file | ||||
/// (false). | ||||
Simon Sapin
|
r48058 | pub(super) fn write( | ||
dirstate_map: &mut DirstateMap, | ||||
Simon Sapin
|
r48478 | can_append: bool, | ||
Simon Sapin
|
r48482 | ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> { | ||
Simon Sapin
|
r48478 | let append = can_append && dirstate_map.write_should_append(); | ||
Simon Sapin
|
r48058 | |||
// This ignores the space for paths, and for nodes without an entry. | ||||
// TODO: better estimate? Skip the `Vec` and write to a file directly? | ||||
Simon Sapin
|
r48482 | let size_guess = std::mem::size_of::<Node>() | ||
* dirstate_map.nodes_with_entry_count as usize; | ||||
Simon Sapin
|
r48058 | |||
Simon Sapin
|
r48478 | let mut writer = Writer { | ||
dirstate_map, | ||||
append, | ||||
out: Vec::with_capacity(size_guess), | ||||
}; | ||||
let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?; | ||||
Simon Sapin
|
r48058 | |||
Simon Sapin
|
r48482 | let meta = TreeMetadata { | ||
Simon Sapin
|
r48476 | root_nodes, | ||
Simon Sapin
|
r48058 | nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(), | ||
nodes_with_copy_source_count: dirstate_map | ||||
.nodes_with_copy_source_count | ||||
.into(), | ||||
Simon Sapin
|
r48481 | unreachable_bytes: dirstate_map.unreachable_bytes.into(), | ||
Simon Sapin
|
r48484 | unused: [0; 4], | ||
Simon Sapin
|
r48202 | ignore_patterns_hash: dirstate_map.ignore_patterns_hash, | ||
Simon Sapin
|
r48058 | }; | ||
Simon Sapin
|
r48482 | Ok((writer.out, meta.as_bytes().to_vec(), append)) | ||
Simon Sapin
|
r48478 | } | ||
struct Writer<'dmap, 'on_disk> { | ||||
dirstate_map: &'dmap DirstateMap<'on_disk>, | ||||
append: bool, | ||||
out: Vec<u8>, | ||||
Simon Sapin
|
r48058 | } | ||
Simon Sapin
|
r48478 | impl Writer<'_, '_> { | ||
fn write_nodes( | ||||
&mut self, | ||||
nodes: dirstate_map::ChildNodesRef, | ||||
) -> Result<ChildNodes, DirstateError> { | ||||
Simon Sapin
|
r48479 | // Reuse already-written nodes if possible | ||
if self.append { | ||||
if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes { | ||||
Simon Sapin
|
r48480 | let start = self.on_disk_offset_of(nodes_slice).expect( | ||
"dirstate-v2 OnDisk nodes not found within on_disk", | ||||
); | ||||
Simon Sapin
|
r48479 | let len = child_nodes_len_from_usize(nodes_slice.len()); | ||
return Ok(ChildNodes { start, len }); | ||||
} | ||||
} | ||||
// `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has | ||||
// undefined iteration order. Sort to enable binary search in the | ||||
// written file. | ||||
Simon Sapin
|
r48478 | let nodes = nodes.sorted(); | ||
let nodes_len = nodes.len(); | ||||
Simon Sapin
|
r48058 | |||
Simon Sapin
|
r48478 | // First accumulate serialized nodes in a `Vec` | ||
let mut on_disk_nodes = Vec::with_capacity(nodes_len); | ||||
for node in nodes { | ||||
let children = | ||||
self.write_nodes(node.children(self.dirstate_map.on_disk)?)?; | ||||
let full_path = node.full_path(self.dirstate_map.on_disk)?; | ||||
let full_path = self.write_path(full_path.as_bytes()); | ||||
let copy_source = if let Some(source) = | ||||
node.copy_source(self.dirstate_map.on_disk)? | ||||
{ | ||||
self.write_path(source.as_bytes()) | ||||
Simon Sapin
|
r48127 | } else { | ||
Simon Sapin
|
r48477 | PathSlice { | ||
Simon Sapin
|
r48127 | start: 0.into(), | ||
len: 0.into(), | ||||
} | ||||
}; | ||||
Simon Sapin
|
r48478 | on_disk_nodes.push(match node { | ||
NodeRef::InMemory(path, node) => { | ||||
let (state, data) = match &node.data { | ||||
dirstate_map::NodeData::Entry(entry) => ( | ||||
entry.state.into(), | ||||
Entry { | ||||
mode: entry.mode.into(), | ||||
mtime: entry.mtime.into(), | ||||
size: entry.size.into(), | ||||
}, | ||||
), | ||||
dirstate_map::NodeData::CachedDirectory { mtime } => { | ||||
(b'd', Entry::from_timestamp(*mtime)) | ||||
} | ||||
dirstate_map::NodeData::None => ( | ||||
b'\0', | ||||
Entry { | ||||
mode: 0.into(), | ||||
mtime: 0.into(), | ||||
size: 0.into(), | ||||
}, | ||||
), | ||||
}; | ||||
Node { | ||||
children, | ||||
copy_source, | ||||
full_path, | ||||
base_name_start: u16::try_from(path.base_name_start()) | ||||
// Could only panic for paths over 64 KiB | ||||
.expect("dirstate-v2 path length overflow") | ||||
.into(), | ||||
descendants_with_entry_count: node | ||||
.descendants_with_entry_count | ||||
.into(), | ||||
tracked_descendants_count: node | ||||
.tracked_descendants_count | ||||
.into(), | ||||
state, | ||||
data, | ||||
Simon Sapin
|
r48124 | } | ||
Simon Sapin
|
r48478 | } | ||
NodeRef::OnDisk(node) => Node { | ||||
Simon Sapin
|
r48137 | children, | ||
copy_source, | ||||
full_path, | ||||
Simon Sapin
|
r48478 | ..*node | ||
}, | ||||
}) | ||||
} | ||||
// … so we can write them contiguously, after writing everything else | ||||
// they refer to. | ||||
let start = self.current_offset(); | ||||
Simon Sapin
|
r48479 | let len = child_nodes_len_from_usize(nodes_len); | ||
Simon Sapin
|
r48478 | self.out.extend(on_disk_nodes.as_bytes()); | ||
Ok(ChildNodes { start, len }) | ||||
Simon Sapin
|
r48058 | } | ||
Simon Sapin
|
r48480 | /// If the given slice of items is within `on_disk`, returns its offset | ||
/// from the start of `on_disk`. | ||||
fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset> | ||||
Simon Sapin
|
r48479 | where | ||
T: BytesCast, | ||||
{ | ||||
fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> { | ||||
let start = slice.as_ptr() as usize; | ||||
let end = start + slice.len(); | ||||
start..=end | ||||
} | ||||
let slice_addresses = address_range(slice.as_bytes()); | ||||
let on_disk_addresses = address_range(self.dirstate_map.on_disk); | ||||
Simon Sapin
|
r48480 | if on_disk_addresses.contains(slice_addresses.start()) | ||
&& on_disk_addresses.contains(slice_addresses.end()) | ||||
{ | ||||
let offset = slice_addresses.start() - on_disk_addresses.start(); | ||||
Some(offset_from_usize(offset)) | ||||
} else { | ||||
None | ||||
} | ||||
Simon Sapin
|
r48479 | } | ||
Simon Sapin
|
r48478 | fn current_offset(&mut self) -> Offset { | ||
let mut offset = self.out.len(); | ||||
if self.append { | ||||
offset += self.dirstate_map.on_disk.len() | ||||
} | ||||
Simon Sapin
|
r48479 | offset_from_usize(offset) | ||
Simon Sapin
|
r48478 | } | ||
Simon Sapin
|
r48477 | |||
Simon Sapin
|
r48478 | fn write_path(&mut self, slice: &[u8]) -> PathSlice { | ||
Simon Sapin
|
r48480 | let len = path_len_from_usize(slice.len()); | ||
// Reuse an already-written path if possible | ||||
if self.append { | ||||
if let Some(start) = self.on_disk_offset_of(slice) { | ||||
return PathSlice { start, len }; | ||||
} | ||||
} | ||||
Simon Sapin
|
r48478 | let start = self.current_offset(); | ||
self.out.extend(slice.as_bytes()); | ||||
PathSlice { start, len } | ||||
} | ||||
Simon Sapin
|
r48058 | } | ||
Simon Sapin
|
r48479 | |||
fn offset_from_usize(x: usize) -> Offset { | ||||
u32::try_from(x) | ||||
// Could only panic for a dirstate file larger than 4 GiB | ||||
.expect("dirstate-v2 offset overflow") | ||||
.into() | ||||
} | ||||
fn child_nodes_len_from_usize(x: usize) -> Size { | ||||
u32::try_from(x) | ||||
// Could only panic with over 4 billion nodes | ||||
.expect("dirstate-v2 slice length overflow") | ||||
.into() | ||||
} | ||||
fn path_len_from_usize(x: usize) -> PathSize { | ||||
u16::try_from(x) | ||||
// Could only panic for paths over 64 KiB | ||||
.expect("dirstate-v2 path length overflow") | ||||
.into() | ||||
} | ||||