##// END OF EJS Templates
dirstate: add a concept of "fallback" flags to dirstate item...
dirstate: add a concept of "fallback" flags to dirstate item The concept is defined and "used" by the flag code, but it is neither persisted nor set anywhere yet. We currently focus on defining the semantic of the attribute. More to come in the next changesets Check the inline documentation for details. Differential Revision: https://phab.mercurial-scm.org/D11686

File last commit:

r49067:dfc5a505 default
r49068:602c8e84 default
Show More
on_disk.rs
748 lines | 23.9 KiB | application/rls-services+xml | RustLexer
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 //! The "version 2" disk representation of the dirstate
//!
Simon Sapin
dirstate-v2: Add internal documentation...
r48978 //! See `mercurial/helptext/internals/dirstate-v2.txt`
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058
Simon Sapin
dirstate-v2: Truncate directory mtimes to 31 bits of seconds...
r49007 use crate::dirstate::TruncatedTimestamp;
Simon Sapin
dirstate-tree: Add `NodeRef` and `ChildNodesRef` enums...
r48124 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 use crate::dirstate_tree::path_with_basename::WithBasename;
use crate::errors::HgError;
use crate::utils::hg_path::HgPath;
use crate::DirstateEntry;
use crate::DirstateError;
use crate::DirstateParents;
Simon Sapin
dirstate-v2: Store a bitfield on disk instead of v1-like state...
r48951 use bitflags::bitflags;
Simon Sapin
dirstate-v2: Store unsigned integers inside DirstateEntry...
r49008 use bytes_cast::unaligned::{U16Be, U32Be};
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 use bytes_cast::BytesCast;
Simon Sapin
dirstate-v2: Introduce a docket file...
r48474 use format_bytes::format_bytes;
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 use std::borrow::Cow;
Simon Sapin
dirstate-v2: Enforce data size read from the docket file...
r48475 use std::convert::{TryFrom, TryInto};
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058
Simon Sapin
dirstate-v2: Change the on-disk format when the requirement is enabled...
r48055 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 /// This a redundant sanity check more than an actual "magic number" since
/// `.hg/requires` already governs which format should be used.
Simon Sapin
dirstate-v2: Change the on-disk format when the requirement is enabled...
r48055 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058
Simon Sapin
dirstate-v2: Introduce a docket file...
r48474 /// Keep space for 256-bit hashes
const STORED_NODE_ID_BYTES: usize = 32;
/// … even though only 160 bits are used for now, with SHA-1
const USED_NODE_ID_BYTES: usize = 20;
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
Simon Sapin
dirstate-v2: initial Python parser...
r49035 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
Simon Sapin
dirstate-v2: Reserve a few bytes of space for future extensions...
r48484 const TREE_METADATA_SIZE: usize = 44;
Simon Sapin
dirstate-v2: Extend node flags to 16 bits...
r49045 const NODE_SIZE: usize = 44;
Simon Sapin
dirstate-v2: Name a constant in the Rust implementation...
r49010
Simon Sapin
dirstate-v2: Move fixed-size tree metadata into the docket file...
r48482 /// Make sure that size-affecting changes are made knowingly
#[allow(unused)]
fn static_assert_size_of() {
let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
Simon Sapin
dirstate-v2: Reserve a few bytes of space for future extensions...
r48484 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
Simon Sapin
dirstate-v2: Name a constant in the Rust implementation...
r49010 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
Simon Sapin
dirstate-v2: Move fixed-size tree metadata into the docket file...
r48482 }
Simon Sapin
dirstate-v2: Introduce a docket file...
r48474 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
#[derive(BytesCast)]
#[repr(C)]
struct DocketHeader {
marker: [u8; V2_FORMAT_MARKER.len()],
parent_1: [u8; STORED_NODE_ID_BYTES],
parent_2: [u8; STORED_NODE_ID_BYTES],
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477
Simon Sapin
dirstate-v2: Move data file info in the docket closer together...
r48977 metadata: TreeMetadata,
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 /// Counted in bytes
Simon Sapin
dirstate-v2: Introduce a docket file...
r48474 data_size: Size,
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477
Simon Sapin
dirstate-v2: Introduce a docket file...
r48474 uuid_size: u8,
}
pub struct Docket<'on_disk> {
header: &'on_disk DocketHeader,
uuid: &'on_disk [u8],
}
Simon Sapin
dirstate-v2: Document flags/mode/size/mtime fields of tree nodes...
r49002 /// Fields are documented in the *Tree metadata in the docket file*
/// section of `mercurial/helptext/internals/dirstate-v2.txt`
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 #[derive(BytesCast)]
#[repr(C)]
Simon Sapin
dirstate-v2: Move fixed-size tree metadata into the docket file...
r48482 struct TreeMetadata {
Simon Sapin
dirstate-v2: Rename Header to Root, move it to the end of the data file...
r48476 root_nodes: ChildNodes,
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 nodes_with_entry_count: Size,
nodes_with_copy_source_count: Size,
Simon Sapin
dirstate-v2: Add heuristic for when to create a new data file...
r48481 unreachable_bytes: Size,
Simon Sapin
dirstate-v2: Reserve a few bytes of space for future extensions...
r48484 unused: [u8; 4],
Simon Sapin
dirstate-v2: Document flags/mode/size/mtime fields of tree nodes...
r49002 /// See *Optional hash of ignore patterns* section of
/// `mercurial/helptext/internals/dirstate-v2.txt`
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 ignore_patterns_hash: IgnorePatternsHash,
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 }
Simon Sapin
dirstate-v2: Document flags/mode/size/mtime fields of tree nodes...
r49002 /// Fields are documented in the *The data file format*
/// section of `mercurial/helptext/internals/dirstate-v2.txt`
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 #[derive(BytesCast)]
#[repr(C)]
Simon Sapin
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
r48128 pub(super) struct Node {
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 full_path: PathSlice,
/// In bytes from `self.full_path.start`
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 base_name_start: PathSize,
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058
copy_source: OptPathSlice,
children: ChildNodes,
Simon Sapin
dirstate-tree: Keep a counter of descendant nodes that have an entry...
r48272 pub(super) descendants_with_entry_count: Size,
Simon Sapin
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
r48128 pub(super) tracked_descendants_count: Size,
Simon Sapin
dirstate-v2: Extend node flags to 16 bits...
r49045 flags: U16Be,
Simon Sapin
dirstate-v2: Add storage space for nanoseconds precision in file mtimes...
r49033 size: U32Be,
mtime: PackedTruncatedTimestamp,
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 }
Simon Sapin
dirstate-v2: Store a bitfield on disk instead of v1-like state...
r48951 bitflags! {
#[repr(C)]
Simon Sapin
dirstate-v2: Extend node flags to 16 bits...
r49045 struct Flags: u16 {
Simon Sapin
dirstate-v2: Store a bitfield on disk instead of v1-like state...
r48951 const WDIR_TRACKED = 1 << 0;
const P1_TRACKED = 1 << 1;
const P2_INFO = 1 << 2;
const HAS_MODE_AND_SIZE = 1 << 3;
Simon Sapin
dirstate-v2: Separate HAS_FILE_MTIME and HAS_DIRECTORY_MTIME flags...
r49046 const HAS_FILE_MTIME = 1 << 4;
const HAS_DIRECTORY_MTIME = 1 << 5;
const MODE_EXEC_PERM = 1 << 6;
const MODE_IS_SYMLINK = 1 << 7;
Simon Sapin
dirstate-v2: adds a flag to mark a file as modified...
r49066 const EXPECTED_STATE_IS_MODIFIED = 1 << 8;
dirstate-v2: adds two flag to track the presence of some unrecorded files...
r49067 const ALL_UNKNOWN_RECORDED = 1 << 9;
const ALL_IGNORED_RECORDED = 1 << 10;
Simon Sapin
dirstate-v2: Store a bitfield on disk instead of v1-like state...
r48951 }
}
Simon Sapin
dirstate-v2: Allow tree nodes without an entry to store a timestamp...
r48137 /// Duration since the Unix epoch
Simon Sapin
dirstate-v2: Separate Rust structs for Timestamp and PackedTimestamp...
r49006 #[derive(BytesCast, Copy, Clone)]
Simon Sapin
dirstate-v2: Allow tree nodes without an entry to store a timestamp...
r48137 #[repr(C)]
Simon Sapin
dirstate-v2: Add storage space for nanoseconds precision in file mtimes...
r49033 struct PackedTruncatedTimestamp {
Simon Sapin
dirstate-v2: Truncate directory mtimes to 31 bits of seconds...
r49007 truncated_seconds: U32Be,
Simon Sapin
dirstate-v2: Allow tree nodes without an entry to store a timestamp...
r48137 nanoseconds: U32Be,
}
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 /// Counted in bytes from the start of the file
///
Simon Sapin
dirstate-v2: Use 32-bit integers instead of 64-bit for offsets...
r48270 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
type Offset = U32Be;
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058
/// Counted in number of items
///
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 type Size = U32Be;
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 /// Counted in bytes
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 ///
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
type PathSize = U16Be;
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058
/// A contiguous sequence of `len` times `Node`, representing the child nodes
/// of either some other node or of the repository root.
///
/// Always sorted by ascending `full_path`, to allow binary search.
/// Since nodes with the same parent nodes also have the same parent path,
/// only the `base_name`s need to be compared during binary search.
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 #[derive(BytesCast, Copy, Clone)]
#[repr(C)]
struct ChildNodes {
start: Offset,
len: Size,
}
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058
/// A `HgPath` of `len` bytes
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 #[derive(BytesCast, Copy, Clone)]
#[repr(C)]
struct PathSlice {
start: Offset,
len: PathSize,
}
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058
/// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 type OptPathSlice = PathSlice;
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058
Simon Sapin
dirstate-v2: Add a zero-size error type for dirstate v2 parse errors...
r48125 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
Simon Sapin
dirstate-v2: Make more APIs fallible, returning Result...
r48126 ///
/// This should only happen if Mercurial is buggy or a repository is corrupted.
#[derive(Debug)]
pub struct DirstateV2ParseError;
Simon Sapin
dirstate-v2: Add a zero-size error type for dirstate v2 parse errors...
r48125
impl From<DirstateV2ParseError> for HgError {
fn from(_: DirstateV2ParseError) -> Self {
HgError::corrupted("dirstate-v2 parse error")
}
}
impl From<DirstateV2ParseError> for crate::DirstateError {
fn from(error: DirstateV2ParseError) -> Self {
HgError::from(error).into()
}
}
Simon Sapin
dirstate-v2: Introduce a docket file...
r48474 impl<'on_disk> Docket<'on_disk> {
pub fn parents(&self) -> DirstateParents {
use crate::Node;
let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
.unwrap()
.clone();
let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
.unwrap()
.clone();
DirstateParents { p1, p2 }
}
Simon Sapin
dirstate-v2: Move fixed-size tree metadata into the docket file...
r48482 pub fn tree_metadata(&self) -> &[u8] {
self.header.metadata.as_bytes()
}
Simon Sapin
dirstate-v2: Enforce data size read from the docket file...
r48475 pub fn data_size(&self) -> usize {
// This `unwrap` could only panic on a 16-bit CPU
self.header.data_size.get().try_into().unwrap()
}
Simon Sapin
dirstate-v2: Introduce a docket file...
r48474 pub fn data_filename(&self) -> String {
Simon Sapin
dirstate-v2: Remove the `.d` suffix in data file names...
r48780 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
Simon Sapin
dirstate-v2: Introduce a docket file...
r48474 }
}
pub fn read_docket(
on_disk: &[u8],
) -> Result<Docket<'_>, DirstateV2ParseError> {
let (header, uuid) =
DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
let uuid_size = header.uuid_size as usize;
if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
Ok(Docket { header, uuid })
Simon Sapin
rhg: Add support for dirstate-v2...
r48165 } else {
Err(DirstateV2ParseError)
}
}
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 pub(super) fn read<'on_disk>(
on_disk: &'on_disk [u8],
Simon Sapin
dirstate-v2: Move fixed-size tree metadata into the docket file...
r48482 metadata: &[u8],
Simon Sapin
dirstate-v2: Introduce a docket file...
r48474 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 if on_disk.is_empty() {
Simon Sapin
dirstate-v2: Introduce a docket file...
r48474 return Ok(DirstateMap::empty(on_disk));
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 }
Simon Sapin
dirstate-v2: Move fixed-size tree metadata into the docket file...
r48482 let (meta, _) = TreeMetadata::from_bytes(metadata)
.map_err(|_| DirstateV2ParseError)?;
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 let dirstate_map = DirstateMap {
on_disk,
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
Simon Sapin
rhg: Add support for dirstate-v2...
r48165 on_disk,
Simon Sapin
dirstate-v2: Move fixed-size tree metadata into the docket file...
r48482 meta.root_nodes,
Simon Sapin
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
r48128 )?),
Simon Sapin
dirstate-v2: Move fixed-size tree metadata into the docket file...
r48482 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
ignore_patterns_hash: meta.ignore_patterns_hash,
unreachable_bytes: meta.unreachable_bytes.get(),
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 };
Simon Sapin
dirstate-v2: Introduce a docket file...
r48474 Ok(dirstate_map)
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 }
impl Node {
Simon Sapin
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
r48128 pub(super) fn full_path<'on_disk>(
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 &self,
on_disk: &'on_disk [u8],
Simon Sapin
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
r48128 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
read_hg_path(on_disk, self.full_path)
}
pub(super) fn base_name_start<'on_disk>(
&self,
) -> Result<usize, DirstateV2ParseError> {
let start = self.base_name_start.get();
if start < self.full_path.len.get() {
let start = usize::try_from(start)
// u32 -> usize, could only panic on a 16-bit CPU
.expect("dirstate-v2 base_name_start out of bounds");
Ok(start)
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 } else {
Simon Sapin
dirstate-v2: Add a zero-size error type for dirstate v2 parse errors...
r48125 Err(DirstateV2ParseError)
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 }
}
Simon Sapin
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
r48128 pub(super) fn base_name<'on_disk>(
&self,
on_disk: &'on_disk [u8],
) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
let full_path = self.full_path(on_disk)?;
let base_name_start = self.base_name_start()?;
Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
}
pub(super) fn path<'on_disk>(
&self,
on_disk: &'on_disk [u8],
) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
Ok(WithBasename::from_raw_parts(
Cow::Borrowed(self.full_path(on_disk)?),
self.base_name_start()?,
))
}
pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
self.copy_source.start.get() != 0
}
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 pub(super) fn copy_source<'on_disk>(
&self,
on_disk: &'on_disk [u8],
Simon Sapin
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
r48128 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
Ok(if self.has_copy_source() {
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 Some(read_hg_path(on_disk, self.copy_source)?)
} else {
None
})
}
Simon Sapin
dirstate-v2: Extend node flags to 16 bits...
r49045 fn flags(&self) -> Flags {
Flags::from_bits_truncate(self.flags.get())
}
Simon Sapin
dirstate-v2: Store a bitfield on disk instead of v1-like state...
r48951 fn has_entry(&self) -> bool {
Simon Sapin
dirstate-v2: Extend node flags to 16 bits...
r49045 self.flags().intersects(
Simon Sapin
dirstate-v2: Store a bitfield on disk instead of v1-like state...
r48951 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
)
}
Simon Sapin
dirstate-v2: Allow tree nodes without an entry to store a timestamp...
r48137 pub(super) fn node_data(
&self,
) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
Simon Sapin
dirstate-v2: Store a bitfield on disk instead of v1-like state...
r48951 if self.has_entry() {
Ok(dirstate_map::NodeData::Entry(self.assume_entry()))
Simon Sapin
dirstate-v2: Truncate directory mtimes to 31 bits of seconds...
r49007 } else if let Some(mtime) = self.cached_directory_mtime()? {
Simon Sapin
dirstate-v2: Store a bitfield on disk instead of v1-like state...
r48951 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
} else {
Ok(dirstate_map::NodeData::None)
Simon Sapin
dirstate-v2: Allow tree nodes without an entry to store a timestamp...
r48137 }
Simon Sapin
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
r48128 }
Simon Sapin
dirstate-v2: Truncate directory mtimes to 31 bits of seconds...
r49007 pub(super) fn cached_directory_mtime(
&self,
) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
dirstate-v2: adds two flag to track the presence of some unrecorded files...
r49067 // For now we do not have code to handle ALL_UNKNOWN_RECORDED, so we
// ignore the mtime if the flag is set.
if self.flags().contains(Flags::HAS_DIRECTORY_MTIME)
&& self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
{
Simon Sapin
dirstate-v2: Separate HAS_FILE_MTIME and HAS_DIRECTORY_MTIME flags...
r49046 if self.flags().contains(Flags::HAS_FILE_MTIME) {
Err(DirstateV2ParseError)
Simon Sapin
dirstate-v2: Truncate directory mtimes to 31 bits of seconds...
r49007 } else {
Simon Sapin
dirstate-v2: Separate HAS_FILE_MTIME and HAS_DIRECTORY_MTIME flags...
r49046 Ok(Some(self.mtime.try_into()?))
}
} else {
Ok(None)
}
Simon Sapin
dirstate-v2: Skip readdir in status based on directory mtime...
r48138 }
Simon Sapin
dirstate-v2: Replace the 32-bit `mode` field with two bits...
r49009 fn synthesize_unix_mode(&self) -> u32 {
Simon Sapin
dirstate-v2: Extend node flags to 16 bits...
r49045 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
Simon Sapin
dirstate-v2: Replace the 32-bit `mode` field with two bits...
r49009 libc::S_IFLNK
} else {
libc::S_IFREG
};
Simon Sapin
dirstate-v2: Extend node flags to 16 bits...
r49045 let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
Simon Sapin
dirstate-v2: Replace the 32-bit `mode` field with two bits...
r49009 0o755
} else {
0o644
};
file_type | permisions
}
Simon Sapin
dirstate-v2: Store a bitfield on disk instead of v1-like state...
r48951 fn assume_entry(&self) -> DirstateEntry {
// TODO: convert through raw bits instead?
Simon Sapin
dirstate-v2: Extend node flags to 16 bits...
r49045 let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED);
let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
let p2_info = self.flags().contains(Flags::P2_INFO);
Simon Sapin
dirstate-v2: adds a flag to mark a file as modified...
r49066 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
&& !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
{
Simon Sapin
dirstate-v2: Add storage space for nanoseconds precision in file mtimes...
r49033 Some((self.synthesize_unix_mode(), self.size.into()))
Simon Sapin
dirstate-v2: Store a bitfield on disk instead of v1-like state...
r48951 } else {
None
};
Simon Sapin
dirstate-v2: adds a flag to mark a file as modified...
r49066 let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME)
&& !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
{
Simon Sapin
dirstate-v2: Add storage space for nanoseconds precision in file mtimes...
r49033 Some(self.mtime.truncated_seconds.into())
Simon Sapin
dirstate-v2: Store a bitfield on disk instead of v1-like state...
r48951 } else {
None
};
DirstateEntry::from_v2_data(
wdir_tracked,
p1_tracked,
p2_info,
mode_size,
mtime,
Simon Sapin
rust: Make the fields of DirstateEntry private...
r48834 )
Simon Sapin
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
r48128 }
Simon Sapin
dirstate-v2: Add a zero-size error type for dirstate v2 parse errors...
r48125 pub(super) fn entry(
&self,
) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
Simon Sapin
dirstate-v2: Store a bitfield on disk instead of v1-like state...
r48951 if self.has_entry() {
Ok(Some(self.assume_entry()))
} else {
Ok(None)
}
Simon Sapin
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
r48128 }
pub(super) fn children<'on_disk>(
&self,
on_disk: &'on_disk [u8],
) -> Result<&'on_disk [Node], DirstateV2ParseError> {
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 read_nodes(on_disk, self.children)
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 }
pub(super) fn to_in_memory_node<'on_disk>(
&self,
on_disk: &'on_disk [u8],
Simon Sapin
dirstate-v2: Add a zero-size error type for dirstate v2 parse errors...
r48125 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 Ok(dirstate_map::Node {
Simon Sapin
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
r48128 children: dirstate_map::ChildNodes::OnDisk(
self.children(on_disk)?,
),
copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
Simon Sapin
dirstate-v2: Allow tree nodes without an entry to store a timestamp...
r48137 data: self.node_data()?,
Simon Sapin
dirstate-tree: Keep a counter of descendant nodes that have an entry...
r48272 descendants_with_entry_count: self
.descendants_with_entry_count
.get(),
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 tracked_descendants_count: self.tracked_descendants_count.get(),
})
}
Simon Sapin
dirstate-v2: Add storage space for nanoseconds precision in file mtimes...
r49033 fn from_dirstate_entry(
entry: &DirstateEntry,
) -> (Flags, U32Be, PackedTruncatedTimestamp) {
Simon Sapin
dirstate-v2: Store a bitfield on disk instead of v1-like state...
r48951 let (wdir_tracked, p1_tracked, p2_info, mode_size_opt, mtime_opt) =
entry.v2_data();
// TODO: convert throug raw flag bits instead?
let mut flags = Flags::empty();
flags.set(Flags::WDIR_TRACKED, wdir_tracked);
flags.set(Flags::P1_TRACKED, p1_tracked);
flags.set(Flags::P2_INFO, p2_info);
Simon Sapin
dirstate-v2: Add storage space for nanoseconds precision in file mtimes...
r49033 let size = if let Some((m, s)) = mode_size_opt {
Simon Sapin
dirstate-v2: Replace the 32-bit `mode` field with two bits...
r49009 let exec_perm = m & libc::S_IXUSR != 0;
let is_symlink = m & libc::S_IFMT == libc::S_IFLNK;
flags.set(Flags::MODE_EXEC_PERM, exec_perm);
flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
Simon Sapin
dirstate-v2: Add storage space for nanoseconds precision in file mtimes...
r49033 flags.insert(Flags::HAS_MODE_AND_SIZE);
s.into()
Simon Sapin
dirstate-v2: Store a bitfield on disk instead of v1-like state...
r48951 } else {
Simon Sapin
dirstate-v2: Add storage space for nanoseconds precision in file mtimes...
r49033 0.into()
Simon Sapin
dirstate-v2: Store a bitfield on disk instead of v1-like state...
r48951 };
Simon Sapin
dirstate-v2: Add storage space for nanoseconds precision in file mtimes...
r49033 let mtime = if let Some(m) = mtime_opt {
Simon Sapin
dirstate-v2: Separate HAS_FILE_MTIME and HAS_DIRECTORY_MTIME flags...
r49046 flags.insert(Flags::HAS_FILE_MTIME);
Simon Sapin
dirstate-v2: Add storage space for nanoseconds precision in file mtimes...
r49033 PackedTruncatedTimestamp {
truncated_seconds: m.into(),
nanoseconds: 0.into(),
}
} else {
PackedTruncatedTimestamp::null()
Simon Sapin
dirstate-v2: Separate Rust structs for Timestamp and PackedTimestamp...
r49006 };
Simon Sapin
dirstate-v2: Add storage space for nanoseconds precision in file mtimes...
r49033 (flags, size, mtime)
Simon Sapin
dirstate-v2: Allow tree nodes without an entry to store a timestamp...
r48137 }
}
Simon Sapin
dirstate-v2: Add a zero-size error type for dirstate v2 parse errors...
r48125 fn read_hg_path(
on_disk: &[u8],
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 slice: PathSlice,
Simon Sapin
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
r48128 ) -> Result<&HgPath, DirstateV2ParseError> {
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 }
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 fn read_nodes(
Simon Sapin
dirstate-v2: Add a zero-size error type for dirstate v2 parse errors...
r48125 on_disk: &[u8],
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 slice: ChildNodes,
) -> Result<&[Node], DirstateV2ParseError> {
read_slice(on_disk, slice.start, slice.len.get())
}
fn read_slice<T, Len>(
on_disk: &[u8],
start: Offset,
len: Len,
Simon Sapin
dirstate-v2: Add a zero-size error type for dirstate v2 parse errors...
r48125 ) -> Result<&[T], DirstateV2ParseError>
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 where
T: BytesCast,
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 Len: TryInto<usize>,
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 {
// Either `usize::MAX` would result in "out of bounds" error since a single
// `&[u8]` cannot occupy the entire addess space.
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 let start = start.get().try_into().unwrap_or(std::usize::MAX);
let len = len.try_into().unwrap_or(std::usize::MAX);
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 on_disk
.get(start..)
.and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
.map(|(slice, _rest)| slice)
Simon Sapin
dirstate-v2: Add a zero-size error type for dirstate v2 parse errors...
r48125 .ok_or_else(|| DirstateV2ParseError)
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 }
Simon Sapin
rhg: Add support for dirstate-v2...
r48165 pub(crate) fn for_each_tracked_path<'on_disk>(
on_disk: &'on_disk [u8],
Simon Sapin
dirstate-v2: Move fixed-size tree metadata into the docket file...
r48482 metadata: &[u8],
Simon Sapin
rhg: Add support for dirstate-v2...
r48165 mut f: impl FnMut(&'on_disk HgPath),
) -> Result<(), DirstateV2ParseError> {
Simon Sapin
dirstate-v2: Move fixed-size tree metadata into the docket file...
r48482 let (meta, _) = TreeMetadata::from_bytes(metadata)
.map_err(|_| DirstateV2ParseError)?;
Simon Sapin
rhg: Add support for dirstate-v2...
r48165 fn recur<'on_disk>(
on_disk: &'on_disk [u8],
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 nodes: ChildNodes,
Simon Sapin
rhg: Add support for dirstate-v2...
r48165 f: &mut impl FnMut(&'on_disk HgPath),
) -> Result<(), DirstateV2ParseError> {
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 for node in read_nodes(on_disk, nodes)? {
Simon Sapin
dirstate-v2: Store a bitfield on disk instead of v1-like state...
r48951 if let Some(entry) = node.entry()? {
if entry.state().is_tracked() {
Simon Sapin
rhg: Add support for dirstate-v2...
r48165 f(node.full_path(on_disk)?)
}
}
recur(on_disk, node.children, f)?
}
Ok(())
}
Simon Sapin
dirstate-v2: Move fixed-size tree metadata into the docket file...
r48482 recur(on_disk, meta.root_nodes, &mut f)
Simon Sapin
rhg: Add support for dirstate-v2...
r48165 }
Simon Sapin
dirstate-v2: Move fixed-size tree metadata into the docket file...
r48482 /// Returns new data and metadata, together with whether that data should be
/// appended to the existing data file whose content is at
/// `dirstate_map.on_disk` (true), instead of written to a new data file
/// (false).
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 pub(super) fn write(
dirstate_map: &mut DirstateMap,
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 can_append: bool,
Simon Sapin
dirstate-v2: Move fixed-size tree metadata into the docket file...
r48482 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 let append = can_append && dirstate_map.write_should_append();
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058
// This ignores the space for paths, and for nodes without an entry.
// TODO: better estimate? Skip the `Vec` and write to a file directly?
Simon Sapin
dirstate-v2: Move fixed-size tree metadata into the docket file...
r48482 let size_guess = std::mem::size_of::<Node>()
* dirstate_map.nodes_with_entry_count as usize;
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 let mut writer = Writer {
dirstate_map,
append,
out: Vec::with_capacity(size_guess),
};
let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058
Simon Sapin
dirstate-v2: Move fixed-size tree metadata into the docket file...
r48482 let meta = TreeMetadata {
Simon Sapin
dirstate-v2: Rename Header to Root, move it to the end of the data file...
r48476 root_nodes,
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
nodes_with_copy_source_count: dirstate_map
.nodes_with_copy_source_count
.into(),
Simon Sapin
dirstate-v2: Add heuristic for when to create a new data file...
r48481 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
Simon Sapin
dirstate-v2: Reserve a few bytes of space for future extensions...
r48484 unused: [0; 4],
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 };
Simon Sapin
dirstate-v2: Move fixed-size tree metadata into the docket file...
r48482 Ok((writer.out, meta.as_bytes().to_vec(), append))
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 }
struct Writer<'dmap, 'on_disk> {
dirstate_map: &'dmap DirstateMap<'on_disk>,
append: bool,
out: Vec<u8>,
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 }
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 impl Writer<'_, '_> {
fn write_nodes(
&mut self,
nodes: dirstate_map::ChildNodesRef,
) -> Result<ChildNodes, DirstateError> {
Simon Sapin
dirstate-v2: Reuse existing nodes when appending to a data file...
r48479 // Reuse already-written nodes if possible
if self.append {
if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
Simon Sapin
dirstate-v2: Reuse existing paths when appending to a data file...
r48480 let start = self.on_disk_offset_of(nodes_slice).expect(
"dirstate-v2 OnDisk nodes not found within on_disk",
);
Simon Sapin
dirstate-v2: Reuse existing nodes when appending to a data file...
r48479 let len = child_nodes_len_from_usize(nodes_slice.len());
return Ok(ChildNodes { start, len });
}
}
// `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
// undefined iteration order. Sort to enable binary search in the
// written file.
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 let nodes = nodes.sorted();
let nodes_len = nodes.len();
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 // First accumulate serialized nodes in a `Vec`
let mut on_disk_nodes = Vec::with_capacity(nodes_len);
for node in nodes {
let children =
self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
let full_path = node.full_path(self.dirstate_map.on_disk)?;
let full_path = self.write_path(full_path.as_bytes());
let copy_source = if let Some(source) =
node.copy_source(self.dirstate_map.on_disk)?
{
self.write_path(source.as_bytes())
Simon Sapin
dirstate-v2: Make the dirstate bytes buffer available in more places...
r48127 } else {
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477 PathSlice {
Simon Sapin
dirstate-v2: Make the dirstate bytes buffer available in more places...
r48127 start: 0.into(),
len: 0.into(),
}
};
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 on_disk_nodes.push(match node {
NodeRef::InMemory(path, node) => {
Simon Sapin
dirstate-v2: Add storage space for nanoseconds precision in file mtimes...
r49033 let (flags, size, mtime) = match &node.data {
Simon Sapin
dirstate-v2: Store a bitfield on disk instead of v1-like state...
r48951 dirstate_map::NodeData::Entry(entry) => {
Simon Sapin
dirstate-v2: Add storage space for nanoseconds precision in file mtimes...
r49033 Node::from_dirstate_entry(entry)
Simon Sapin
dirstate-v2: Store a bitfield on disk instead of v1-like state...
r48951 }
Simon Sapin
dirstate-v2: Separate HAS_FILE_MTIME and HAS_DIRECTORY_MTIME flags...
r49046 dirstate_map::NodeData::CachedDirectory { mtime } => (
dirstate-v2: adds two flag to track the presence of some unrecorded files...
r49067 // we currently never set a mtime if unknown file
// are present.
// So if we have a mtime for a directory, we know
// they are no unknown
// files and we
// blindly set ALL_UNKNOWN_RECORDED.
//
// We never set ALL_IGNORED_RECORDED since we
// don't track that case
// currently.
Flags::HAS_DIRECTORY_MTIME
| Flags::ALL_UNKNOWN_RECORDED,
Simon Sapin
dirstate-v2: Separate HAS_FILE_MTIME and HAS_DIRECTORY_MTIME flags...
r49046 0.into(),
(*mtime).into(),
),
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 dirstate_map::NodeData::None => (
Simon Sapin
dirstate-v2: Store a bitfield on disk instead of v1-like state...
r48951 Flags::empty(),
Simon Sapin
dirstate-v2: Add storage space for nanoseconds precision in file mtimes...
r49033 0.into(),
PackedTruncatedTimestamp::null(),
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 ),
};
Node {
children,
copy_source,
full_path,
base_name_start: u16::try_from(path.base_name_start())
// Could only panic for paths over 64 KiB
.expect("dirstate-v2 path length overflow")
.into(),
descendants_with_entry_count: node
.descendants_with_entry_count
.into(),
tracked_descendants_count: node
.tracked_descendants_count
.into(),
Simon Sapin
dirstate-v2: Extend node flags to 16 bits...
r49045 flags: flags.bits().into(),
Simon Sapin
dirstate-v2: Add storage space for nanoseconds precision in file mtimes...
r49033 size,
mtime,
Simon Sapin
dirstate-tree: Add `NodeRef` and `ChildNodesRef` enums...
r48124 }
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 }
NodeRef::OnDisk(node) => Node {
Simon Sapin
dirstate-v2: Allow tree nodes without an entry to store a timestamp...
r48137 children,
copy_source,
full_path,
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 ..*node
},
})
}
// … so we can write them contiguously, after writing everything else
// they refer to.
let start = self.current_offset();
Simon Sapin
dirstate-v2: Reuse existing nodes when appending to a data file...
r48479 let len = child_nodes_len_from_usize(nodes_len);
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 self.out.extend(on_disk_nodes.as_bytes());
Ok(ChildNodes { start, len })
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 }
Simon Sapin
dirstate-v2: Reuse existing paths when appending to a data file...
r48480 /// If the given slice of items is within `on_disk`, returns its offset
/// from the start of `on_disk`.
fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
Simon Sapin
dirstate-v2: Reuse existing nodes when appending to a data file...
r48479 where
T: BytesCast,
{
fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
let start = slice.as_ptr() as usize;
let end = start + slice.len();
start..=end
}
let slice_addresses = address_range(slice.as_bytes());
let on_disk_addresses = address_range(self.dirstate_map.on_disk);
Simon Sapin
dirstate-v2: Reuse existing paths when appending to a data file...
r48480 if on_disk_addresses.contains(slice_addresses.start())
&& on_disk_addresses.contains(slice_addresses.end())
{
let offset = slice_addresses.start() - on_disk_addresses.start();
Some(offset_from_usize(offset))
} else {
None
}
Simon Sapin
dirstate-v2: Reuse existing nodes when appending to a data file...
r48479 }
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 fn current_offset(&mut self) -> Offset {
let mut offset = self.out.len();
if self.append {
offset += self.dirstate_map.on_disk.len()
}
Simon Sapin
dirstate-v2: Reuse existing nodes when appending to a data file...
r48479 offset_from_usize(offset)
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 }
Simon Sapin
dirstate-v2: shrink on-disk path lengths to 16-bits...
r48477
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
Simon Sapin
dirstate-v2: Reuse existing paths when appending to a data file...
r48480 let len = path_len_from_usize(slice.len());
// Reuse an already-written path if possible
if self.append {
if let Some(start) = self.on_disk_offset_of(slice) {
return PathSlice { start, len };
}
}
Simon Sapin
dirstate-v2: Support appending to the same data file...
r48478 let start = self.current_offset();
self.out.extend(slice.as_bytes());
PathSlice { start, len }
}
Simon Sapin
dirstate-v2: Change the on-disk format to be tree-shaped...
r48058 }
Simon Sapin
dirstate-v2: Reuse existing nodes when appending to a data file...
r48479
fn offset_from_usize(x: usize) -> Offset {
u32::try_from(x)
// Could only panic for a dirstate file larger than 4 GiB
.expect("dirstate-v2 offset overflow")
.into()
}
fn child_nodes_len_from_usize(x: usize) -> Size {
u32::try_from(x)
// Could only panic with over 4 billion nodes
.expect("dirstate-v2 slice length overflow")
.into()
}
fn path_len_from_usize(x: usize) -> PathSize {
u16::try_from(x)
// Could only panic for paths over 64 KiB
.expect("dirstate-v2 path length overflow")
.into()
}
Simon Sapin
dirstate-v2: Add storage space for nanoseconds precision in file mtimes...
r49033
impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
fn from(timestamp: TruncatedTimestamp) -> Self {
Self {
truncated_seconds: timestamp.truncated_seconds().into(),
nanoseconds: timestamp.nanoseconds().into(),
}
}
}
impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
type Error = DirstateV2ParseError;
fn try_from(
timestamp: PackedTruncatedTimestamp,
) -> Result<Self, Self::Error> {
Self::from_already_truncated(
timestamp.truncated_seconds.get(),
timestamp.nanoseconds.get(),
)
}
}
impl PackedTruncatedTimestamp {
fn null() -> Self {
Self {
truncated_seconds: 0.into(),
nanoseconds: 0.into(),
}
}
}