##// END OF EJS Templates
dirstate-v2: Support appending to the same data file...
dirstate-v2: Support appending to the same data file For now we’re still writing the entire data every time, so appending is not useful yet. Later we’ll have new nodes pointing to some existing data for nodes and paths that haven’t changed. The decision whether to append is pseudo-random in order to make tests exercise both code paths. This will be replaced by a heuristic based on the amount of unused existing data. Differential Revision: https://phab.mercurial-scm.org/D11094

File last commit:

r48478:065e6162 default
r48478:065e6162 default
Show More
on_disk.rs
695 lines | 23.0 KiB | application/rls-services+xml | RustLexer
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 //! The "version 2" disk representation of the dirstate
//!
//! # File format
//!
Simon Sapin
dirstate-v2: Rename Header to Root, move it to the end of the data file...
r48476 //! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts
//! with a fixed-sized header whose layout is defined by the `DocketHeader`
//! struct, followed by the data file identifier.
//!
//! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That
//! file may be longer than the size given in the docket, but not shorter. Only
//! the start of the data file up to the given size is considered. The
//! fixed-size "root" of the dirstate tree whose layout is defined by the
//! `Root` struct is found at the end of that slice of data.
//!
//! Its `root_nodes` field contains the slice (offset and length) to
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 //! the nodes representing the files and directories at the root of the
//! repository. Each node is also fixed-size, defined by the `Node` struct.
//! Nodes in turn contain slices to variable-size paths, and to their own child
//! nodes (if any) for nested files and directories.
Simon Sapin
dirstate-tree: Add `NodeRef` and `ChildNodesRef` enums...
r48124 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 use crate::dirstate_tree::path_with_basename::WithBasename;
use crate::errors::HgError;
use crate::utils::hg_path::HgPath;
use crate::DirstateEntry;
use crate::DirstateError;
use crate::DirstateParents;
Simon Sapin
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
r48128 use crate::EntryState;
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be};
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 use bytes_cast::BytesCast;
Simon Sapin
dirstate-v2: Introduce a docket file...
r48474 use format_bytes::format_bytes;
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 use std::borrow::Cow;
Simon Sapin
dirstate-v2: Enforce data size read from the docket file...
r48475 use std::convert::{TryFrom, TryInto};
Simon Sapin
dirstate-v2: Allow tree nodes without an entry to store a timestamp...
r48137 use std::time::{Duration, SystemTime, UNIX_EPOCH};
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058
Simon Sapin
dirstate-v2: Change the on-disk format when the requirement is enabled...
r48055 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 /// This a redundant sanity check more than an actual "magic number" since
/// `.hg/requires` already governs which format should be used.
Simon Sapin
dirstate-v2: Change the on-disk format when the requirement is enabled...
r48055 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058
Simon Sapin
dirstate-v2: Introduce a docket file...
r48474 /// Keep space for 256-bit hashes
const STORED_NODE_ID_BYTES: usize = 32;
/// … even though only 160 bits are used for now, with SHA-1
const USED_NODE_ID_BYTES: usize = 20;
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
Simon Sapin
dirstate-v2: Introduce a docket file...
r48474 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
#[derive(BytesCast)]
#[repr(C)]
struct DocketHeader {
marker: [u8; V2_FORMAT_MARKER.len()],
parent_1: [u8; STORED_NODE_ID_BYTES],
parent_2: [u8; STORED_NODE_ID_BYTES],
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477
/// Counted in bytes
Simon Sapin
dirstate-v2: Introduce a docket file...
r48474 data_size: Size,
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477
Simon Sapin
dirstate-v2: Introduce a docket file...
r48474 uuid_size: u8,
}
pub struct Docket<'on_disk> {
header: &'on_disk DocketHeader,
uuid: &'on_disk [u8],
}
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 #[derive(BytesCast)]
#[repr(C)]
Simon Sapin
dirstate-v2: Rename Header to Root, move it to the end of the data file...
r48476 struct Root {
root_nodes: ChildNodes,
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 nodes_with_entry_count: Size,
nodes_with_copy_source_count: Size,
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202
/// If non-zero, a hash of ignore files that were used for some previous
/// run of the `status` algorithm.
///
/// We define:
///
/// * "Root" ignore files are `.hgignore` at the root of the repository if
/// it exists, and files from `ui.ignore.*` config. This set of files is
/// then sorted by the string representation of their path.
/// * The "expanded contents" of an ignore files is the byte string made
/// by concatenating its contents with the "expanded contents" of other
/// files included with `include:` or `subinclude:` files, in inclusion
/// order. This definition is recursive, as included files can
/// themselves include more files.
///
/// This hash is defined as the SHA-1 of the concatenation (in sorted
/// order) of the "expanded contents" of each "root" ignore file.
/// (Note that computing this does not require actually concatenating byte
/// strings into contiguous memory, instead SHA-1 hashing can be done
/// incrementally.)
ignore_patterns_hash: IgnorePatternsHash,
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 }
#[derive(BytesCast)]
#[repr(C)]
Simon Sapin
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
r48128 pub(super) struct Node {
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 full_path: PathSlice,
/// In bytes from `self.full_path.start`
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 base_name_start: PathSize,
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058
copy_source: OptPathSlice,
children: ChildNodes,
Simon Sapin
dirstate-tree: Keep a counter of descendant nodes that have an entry...
r48272 pub(super) descendants_with_entry_count: Size,
Simon Sapin
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
r48128 pub(super) tracked_descendants_count: Size,
Simon Sapin
dirstate-v2: Allow tree nodes without an entry to store a timestamp...
r48137
Simon Sapin
rust: Document the DirstateMapMethods trait...
r48285 /// Depending on the value of `state`:
Simon Sapin
dirstate-v2: Allow tree nodes without an entry to store a timestamp...
r48137 ///
Simon Sapin
dirstate-v2: Skip readdir in status based on directory mtime...
r48138 /// * A null byte: `data` is not used.
///
Simon Sapin
dirstate-v2: Allow tree nodes without an entry to store a timestamp...
r48137 /// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together
Simon Sapin
dirstate-v2: Skip readdir in status based on directory mtime...
r48138 /// represent a dirstate entry like in the v1 format.
///
Simon Sapin
dirstate-v2: Allow tree nodes without an entry to store a timestamp...
r48137 /// * A `d` ASCII byte: the bytes of `data` should instead be interpreted
/// as the `Timestamp` for the mtime of a cached directory.
///
Simon Sapin
dirstate-v2: Skip readdir in status based on directory mtime...
r48138 /// The presence of this state means that at some point, this path in
/// the working directory was observed:
///
/// - To be a directory
/// - With the modification time as given by `Timestamp`
/// - That timestamp was already strictly in the past when observed,
/// meaning that later changes cannot happen in the same clock tick
/// and must cause a different modification time (unless the system
/// clock jumps back and we get unlucky, which is not impossible but
/// but deemed unlikely enough).
Simon Sapin
status: Extend read_dir caching to directories with ignored files...
r48269 /// - All direct children of this directory (as returned by
/// `std::fs::read_dir`) either have a corresponding dirstate node, or
/// are ignored by ignore patterns whose hash is in
Simon Sapin
dirstate-v2: Rename Header to Root, move it to the end of the data file...
r48476 /// `Root::ignore_patterns_hash`.
Simon Sapin
dirstate-v2: Skip readdir in status based on directory mtime...
r48138 ///
/// This means that if `std::fs::symlink_metadata` later reports the
Simon Sapin
status: Extend read_dir caching to directories with ignored files...
r48269 /// same modification time and ignored patterns haven’t changed, a run
/// of status that is not listing ignored files can skip calling
/// `std::fs::read_dir` again for this directory, iterate child
/// dirstate nodes instead.
Simon Sapin
dirstate-v2: Allow tree nodes without an entry to store a timestamp...
r48137 state: u8,
data: Entry,
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 }
Simon Sapin
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
r48128 #[derive(BytesCast, Copy, Clone)]
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 #[repr(C)]
Simon Sapin
dirstate-v2: Allow tree nodes without an entry to store a timestamp...
r48137 struct Entry {
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 mode: I32Be,
mtime: I32Be,
size: I32Be,
}
Simon Sapin
dirstate-v2: Allow tree nodes without an entry to store a timestamp...
r48137 /// Duration since the Unix epoch
Simon Sapin
dirstate-v2: Skip readdir in status based on directory mtime...
r48138 #[derive(BytesCast, Copy, Clone, PartialEq)]
Simon Sapin
dirstate-v2: Allow tree nodes without an entry to store a timestamp...
r48137 #[repr(C)]
pub(super) struct Timestamp {
seconds: I64Be,
/// In `0 .. 1_000_000_000`.
///
/// This timestamp is later or earlier than `(seconds, 0)` by this many
/// nanoseconds, if `seconds` is non-negative or negative, respectively.
nanoseconds: U32Be,
}
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 /// Counted in bytes from the start of the file
///
Simon Sapin
dirstate-v2: Use 32-bit integers instead of 64-bit for offsets...
r48270 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
type Offset = U32Be;
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058
/// Counted in number of items
///
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 type Size = U32Be;
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 /// Counted in bytes
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 ///
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
type PathSize = U16Be;
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058
/// A contiguous sequence of `len` times `Node`, representing the child nodes
/// of either some other node or of the repository root.
///
/// Always sorted by ascending `full_path`, to allow binary search.
/// Since nodes with the same parent nodes also have the same parent path,
/// only the `base_name`s need to be compared during binary search.
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 #[derive(BytesCast, Copy, Clone)]
#[repr(C)]
struct ChildNodes {
start: Offset,
len: Size,
}
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058
/// A `HgPath` of `len` bytes
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 #[derive(BytesCast, Copy, Clone)]
#[repr(C)]
struct PathSlice {
start: Offset,
len: PathSize,
}
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058
/// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 type OptPathSlice = PathSlice;
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058
/// Make sure that size-affecting changes are made knowingly
fn _static_assert_size_of() {
Simon Sapin
dirstate-v2: Introduce a docket file...
r48474 let _ = std::mem::transmute::<DocketHeader, [u8; 81]>;
Simon Sapin
dirstate-v2: Rename Header to Root, move it to the end of the data file...
r48476 let _ = std::mem::transmute::<Root, [u8; 36]>;
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 let _ = std::mem::transmute::<Node, [u8; 43]>;
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 }
Simon Sapin
dirstate-v2: Add a zero-size error type for dirstate v2 parse errors...
r48125 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
Simon Sapin
dirstate-v2: Make more APIs fallible, returning Result...
r48126 ///
/// This should only happen if Mercurial is buggy or a repository is corrupted.
#[derive(Debug)]
pub struct DirstateV2ParseError;
Simon Sapin
dirstate-v2: Add a zero-size error type for dirstate v2 parse errors...
r48125
impl From<DirstateV2ParseError> for HgError {
fn from(_: DirstateV2ParseError) -> Self {
HgError::corrupted("dirstate-v2 parse error")
}
}
impl From<DirstateV2ParseError> for crate::DirstateError {
fn from(error: DirstateV2ParseError) -> Self {
HgError::from(error).into()
}
}
Simon Sapin
dirstate-v2: Introduce a docket file...
r48474 impl<'on_disk> Docket<'on_disk> {
pub fn parents(&self) -> DirstateParents {
use crate::Node;
let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
.unwrap()
.clone();
let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
.unwrap()
.clone();
DirstateParents { p1, p2 }
}
Simon Sapin
dirstate-v2: Enforce data size read from the docket file...
r48475 pub fn data_size(&self) -> usize {
// This `unwrap` could only panic on a 16-bit CPU
self.header.data_size.get().try_into().unwrap()
}
Simon Sapin
dirstate-v2: Introduce a docket file...
r48474 pub fn data_filename(&self) -> String {
String::from_utf8(format_bytes!(b"dirstate.{}.d", self.uuid)).unwrap()
}
}
pub fn read_docket(
on_disk: &[u8],
) -> Result<Docket<'_>, DirstateV2ParseError> {
let (header, uuid) =
DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
let uuid_size = header.uuid_size as usize;
if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
Ok(Docket { header, uuid })
Simon Sapin
rhg: Add support for dirstate-v2...
r48165 } else {
Err(DirstateV2ParseError)
}
}
Simon Sapin
dirstate-v2: Rename Header to Root, move it to the end of the data file...
r48476 fn read_root<'on_disk>(
on_disk: &'on_disk [u8],
) -> Result<&'on_disk Root, DirstateV2ParseError> {
// Find the `Root` at the end of the given slice
let root_offset = on_disk
.len()
.checked_sub(std::mem::size_of::<Root>())
// A non-empty slice too short is an error
.ok_or(DirstateV2ParseError)?;
let (root, _) = Root::from_bytes(&on_disk[root_offset..])
.map_err(|_| DirstateV2ParseError)?;
Ok(root)
}
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 pub(super) fn read<'on_disk>(
on_disk: &'on_disk [u8],
Simon Sapin
dirstate-v2: Introduce a docket file...
r48474 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 if on_disk.is_empty() {
Simon Sapin
dirstate-v2: Introduce a docket file...
r48474 return Ok(DirstateMap::empty(on_disk));
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 }
Simon Sapin
dirstate-v2: Rename Header to Root, move it to the end of the data file...
r48476 let root = read_root(on_disk)?;
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 let dirstate_map = DirstateMap {
on_disk,
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
Simon Sapin
rhg: Add support for dirstate-v2...
r48165 on_disk,
Simon Sapin
dirstate-v2: Rename Header to Root, move it to the end of the data file...
r48476 root.root_nodes,
Simon Sapin
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
r48128 )?),
Simon Sapin
dirstate-v2: Rename Header to Root, move it to the end of the data file...
r48476 nodes_with_entry_count: root.nodes_with_entry_count.get(),
nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(),
ignore_patterns_hash: root.ignore_patterns_hash,
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 };
Simon Sapin
dirstate-v2: Introduce a docket file...
r48474 Ok(dirstate_map)
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 }
impl Node {
Simon Sapin
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
r48128 pub(super) fn full_path<'on_disk>(
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 &self,
on_disk: &'on_disk [u8],
Simon Sapin
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
r48128 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
read_hg_path(on_disk, self.full_path)
}
pub(super) fn base_name_start<'on_disk>(
&self,
) -> Result<usize, DirstateV2ParseError> {
let start = self.base_name_start.get();
if start < self.full_path.len.get() {
let start = usize::try_from(start)
// u32 -> usize, could only panic on a 16-bit CPU
.expect("dirstate-v2 base_name_start out of bounds");
Ok(start)
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 } else {
Simon Sapin
dirstate-v2: Add a zero-size error type for dirstate v2 parse errors...
r48125 Err(DirstateV2ParseError)
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 }
}
Simon Sapin
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
r48128 pub(super) fn base_name<'on_disk>(
&self,
on_disk: &'on_disk [u8],
) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
let full_path = self.full_path(on_disk)?;
let base_name_start = self.base_name_start()?;
Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
}
pub(super) fn path<'on_disk>(
&self,
on_disk: &'on_disk [u8],
) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
Ok(WithBasename::from_raw_parts(
Cow::Borrowed(self.full_path(on_disk)?),
self.base_name_start()?,
))
}
pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
self.copy_source.start.get() != 0
}
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 pub(super) fn copy_source<'on_disk>(
&self,
on_disk: &'on_disk [u8],
Simon Sapin
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
r48128 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
Ok(if self.has_copy_source() {
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 Some(read_hg_path(on_disk, self.copy_source)?)
} else {
None
})
}
Simon Sapin
dirstate-v2: Allow tree nodes without an entry to store a timestamp...
r48137 pub(super) fn node_data(
&self,
) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
let entry = |state| {
dirstate_map::NodeData::Entry(self.entry_with_given_state(state))
};
match self.state {
b'\0' => Ok(dirstate_map::NodeData::None),
b'd' => Ok(dirstate_map::NodeData::CachedDirectory {
mtime: *self.data.as_timestamp(),
}),
b'n' => Ok(entry(EntryState::Normal)),
b'a' => Ok(entry(EntryState::Added)),
b'r' => Ok(entry(EntryState::Removed)),
b'm' => Ok(entry(EntryState::Merged)),
_ => Err(DirstateV2ParseError),
}
Simon Sapin
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
r48128 }
Simon Sapin
dirstate-v2: Skip readdir in status based on directory mtime...
r48138 pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {
if self.state == b'd' {
Some(self.data.as_timestamp())
} else {
None
}
}
Simon Sapin
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
r48128 pub(super) fn state(
&self,
) -> Result<Option<EntryState>, DirstateV2ParseError> {
Simon Sapin
dirstate-v2: Allow tree nodes without an entry to store a timestamp...
r48137 match self.state {
b'\0' | b'd' => Ok(None),
b'n' => Ok(Some(EntryState::Normal)),
b'a' => Ok(Some(EntryState::Added)),
b'r' => Ok(Some(EntryState::Removed)),
b'm' => Ok(Some(EntryState::Merged)),
_ => Err(DirstateV2ParseError),
}
}
fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry {
DirstateEntry {
state,
mode: self.data.mode.get(),
mtime: self.data.mtime.get(),
size: self.data.size.get(),
}
Simon Sapin
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
r48128 }
Simon Sapin
dirstate-v2: Add a zero-size error type for dirstate v2 parse errors...
r48125 pub(super) fn entry(
&self,
) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
Simon Sapin
dirstate-v2: Allow tree nodes without an entry to store a timestamp...
r48137 Ok(self
.state()?
.map(|state| self.entry_with_given_state(state)))
Simon Sapin
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
r48128 }
pub(super) fn children<'on_disk>(
&self,
on_disk: &'on_disk [u8],
) -> Result<&'on_disk [Node], DirstateV2ParseError> {
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 read_nodes(on_disk, self.children)
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 }
pub(super) fn to_in_memory_node<'on_disk>(
&self,
on_disk: &'on_disk [u8],
Simon Sapin
dirstate-v2: Add a zero-size error type for dirstate v2 parse errors...
r48125 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 Ok(dirstate_map::Node {
Simon Sapin
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
r48128 children: dirstate_map::ChildNodes::OnDisk(
self.children(on_disk)?,
),
copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
Simon Sapin
dirstate-v2: Allow tree nodes without an entry to store a timestamp...
r48137 data: self.node_data()?,
Simon Sapin
dirstate-tree: Keep a counter of descendant nodes that have an entry...
r48272 descendants_with_entry_count: self
.descendants_with_entry_count
.get(),
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 tracked_descendants_count: self.tracked_descendants_count.get(),
})
}
}
Simon Sapin
dirstate-v2: Allow tree nodes without an entry to store a timestamp...
r48137 impl Entry {
fn from_timestamp(timestamp: Timestamp) -> Self {
// Safety: both types implement the `ByteCast` trait, so we could
// safely use `as_bytes` and `from_bytes` to do this conversion. Using
// `transmute` instead makes the compiler check that the two types
// have the same size, which eliminates the error case of
// `from_bytes`.
unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }
}
fn as_timestamp(&self) -> &Timestamp {
// Safety: same as above in `from_timestamp`
unsafe { &*(self as *const Entry as *const Timestamp) }
}
}
Simon Sapin
dirstate-v2: Add --dirs to debugdirstate command...
r48140 impl Timestamp {
pub fn seconds(&self) -> i64 {
self.seconds.get()
}
}
Simon Sapin
dirstate-v2: Skip readdir in status based on directory mtime...
r48138 impl From<SystemTime> for Timestamp {
fn from(system_time: SystemTime) -> Self {
Simon Sapin
dirstate-v2: Allow tree nodes without an entry to store a timestamp...
r48137 let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {
Ok(duration) => {
(duration.as_secs() as i64, duration.subsec_nanos())
}
Err(error) => {
let negative = error.duration();
(-(negative.as_secs() as i64), negative.subsec_nanos())
}
};
Timestamp {
seconds: secs.into(),
nanoseconds: nanos.into(),
}
}
}
impl From<&'_ Timestamp> for SystemTime {
fn from(timestamp: &'_ Timestamp) -> Self {
let secs = timestamp.seconds.get();
let nanos = timestamp.nanoseconds.get();
if secs >= 0 {
UNIX_EPOCH + Duration::new(secs as u64, nanos)
} else {
UNIX_EPOCH - Duration::new((-secs) as u64, nanos)
}
}
}
Simon Sapin
dirstate-v2: Add a zero-size error type for dirstate v2 parse errors...
r48125 fn read_hg_path(
on_disk: &[u8],
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 slice: PathSlice,
Simon Sapin
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
r48128 ) -> Result<&HgPath, DirstateV2ParseError> {
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 }
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 fn read_nodes(
Simon Sapin
dirstate-v2: Add a zero-size error type for dirstate v2 parse errors...
r48125 on_disk: &[u8],
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 slice: ChildNodes,
) -> Result<&[Node], DirstateV2ParseError> {
read_slice(on_disk, slice.start, slice.len.get())
}
fn read_slice<T, Len>(
on_disk: &[u8],
start: Offset,
len: Len,
Simon Sapin
dirstate-v2: Add a zero-size error type for dirstate v2 parse errors...
r48125 ) -> Result<&[T], DirstateV2ParseError>
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 where
T: BytesCast,
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 Len: TryInto<usize>,
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 {
// Either `usize::MAX` would result in "out of bounds" error since a single
// `&[u8]` cannot occupy the entire addess space.
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 let start = start.get().try_into().unwrap_or(std::usize::MAX);
let len = len.try_into().unwrap_or(std::usize::MAX);
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 on_disk
.get(start..)
.and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
.map(|(slice, _rest)| slice)
Simon Sapin
dirstate-v2: Add a zero-size error type for dirstate v2 parse errors...
r48125 .ok_or_else(|| DirstateV2ParseError)
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 }
Simon Sapin
rhg: Add support for dirstate-v2...
r48165 pub(crate) fn for_each_tracked_path<'on_disk>(
on_disk: &'on_disk [u8],
mut f: impl FnMut(&'on_disk HgPath),
) -> Result<(), DirstateV2ParseError> {
Simon Sapin
dirstate-v2: Rename Header to Root, move it to the end of the data file...
r48476 let root = read_root(on_disk)?;
Simon Sapin
rhg: Add support for dirstate-v2...
r48165 fn recur<'on_disk>(
on_disk: &'on_disk [u8],
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 nodes: ChildNodes,
Simon Sapin
rhg: Add support for dirstate-v2...
r48165 f: &mut impl FnMut(&'on_disk HgPath),
) -> Result<(), DirstateV2ParseError> {
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 for node in read_nodes(on_disk, nodes)? {
Simon Sapin
rhg: Add support for dirstate-v2...
r48165 if let Some(state) = node.state()? {
if state.is_tracked() {
f(node.full_path(on_disk)?)
}
}
recur(on_disk, node.children, f)?
}
Ok(())
}
Simon Sapin
dirstate-v2: Rename Header to Root, move it to the end of the data file...
r48476 recur(on_disk, root.root_nodes, &mut f)
Simon Sapin
rhg: Add support for dirstate-v2...
r48165 }
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 /// Returns new data together with whether that data should be appended to the
/// existing data file whose content is at `dirstate_map.on_disk` (true),
/// instead of written to a new data file (false).
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 pub(super) fn write(
dirstate_map: &mut DirstateMap,
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 can_append: bool,
) -> Result<(Vec<u8>, bool), DirstateError> {
let append = can_append && dirstate_map.write_should_append();
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058
// This ignores the space for paths, and for nodes without an entry.
// TODO: better estimate? Skip the `Vec` and write to a file directly?
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 let size_guess = std::mem::size_of::<Root>()
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 + std::mem::size_of::<Node>()
* dirstate_map.nodes_with_entry_count as usize;
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 let mut writer = Writer {
dirstate_map,
append,
out: Vec::with_capacity(size_guess),
};
let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058
Simon Sapin
dirstate-v2: Rename Header to Root, move it to the end of the data file...
r48476 let root = Root {
root_nodes,
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
nodes_with_copy_source_count: dirstate_map
.nodes_with_copy_source_count
.into(),
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 };
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 writer.out.extend(root.as_bytes());
Ok((writer.out, append))
}
struct Writer<'dmap, 'on_disk> {
dirstate_map: &'dmap DirstateMap<'on_disk>,
append: bool,
out: Vec<u8>,
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 }
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 impl Writer<'_, '_> {
fn write_nodes(
&mut self,
nodes: dirstate_map::ChildNodesRef,
) -> Result<ChildNodes, DirstateError> {
// `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
// order. Sort to enable binary search in the written file.
let nodes = nodes.sorted();
let nodes_len = nodes.len();
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 // First accumulate serialized nodes in a `Vec`
let mut on_disk_nodes = Vec::with_capacity(nodes_len);
for node in nodes {
let children =
self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
let full_path = node.full_path(self.dirstate_map.on_disk)?;
let full_path = self.write_path(full_path.as_bytes());
let copy_source = if let Some(source) =
node.copy_source(self.dirstate_map.on_disk)?
{
self.write_path(source.as_bytes())
Simon Sapin
dirstate-v2: Make the dirstate bytes buffer available in more places...
r48127 } else {
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 PathSlice {
Simon Sapin
dirstate-v2: Make the dirstate bytes buffer available in more places...
r48127 start: 0.into(),
len: 0.into(),
}
};
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 on_disk_nodes.push(match node {
NodeRef::InMemory(path, node) => {
let (state, data) = match &node.data {
dirstate_map::NodeData::Entry(entry) => (
entry.state.into(),
Entry {
mode: entry.mode.into(),
mtime: entry.mtime.into(),
size: entry.size.into(),
},
),
dirstate_map::NodeData::CachedDirectory { mtime } => {
(b'd', Entry::from_timestamp(*mtime))
}
dirstate_map::NodeData::None => (
b'\0',
Entry {
mode: 0.into(),
mtime: 0.into(),
size: 0.into(),
},
),
};
Node {
children,
copy_source,
full_path,
base_name_start: u16::try_from(path.base_name_start())
// Could only panic for paths over 64 KiB
.expect("dirstate-v2 path length overflow")
.into(),
descendants_with_entry_count: node
.descendants_with_entry_count
.into(),
tracked_descendants_count: node
.tracked_descendants_count
.into(),
state,
data,
Simon Sapin
dirstate-tree: Add `NodeRef` and `ChildNodesRef` enums...
r48124 }
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 }
NodeRef::OnDisk(node) => Node {
Simon Sapin
dirstate-v2: Allow tree nodes without an entry to store a timestamp...
r48137 children,
copy_source,
full_path,
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 ..*node
},
})
}
// … so we can write them contiguously, after writing everything else
// they refer to.
let start = self.current_offset();
let len = u32::try_from(nodes_len)
// Could only panic with over 4 billion nodes
.expect("dirstate-v2 path length overflow")
.into();
self.out.extend(on_disk_nodes.as_bytes());
Ok(ChildNodes { start, len })
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 }
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 fn current_offset(&mut self) -> Offset {
let mut offset = self.out.len();
if self.append {
offset += self.dirstate_map.on_disk.len()
}
u32::try_from(offset)
// Could only panic for a dirstate file larger than 4 GiB
.expect("dirstate-v2 offset overflow")
.into()
}
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
let start = self.current_offset();
let len = u16::try_from(slice.len())
// Could only panic for paths over 64 KiB
.expect("dirstate-v2 path length overflow")
.into();
self.out.extend(slice.as_bytes());
PathSlice { start, len }
}
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 }