|
|
//! The "version 2" disk representation of the dirstate
|
|
|
//!
|
|
|
//! See `mercurial/helptext/internals/dirstate-v2.txt`
|
|
|
|
|
|
use crate::dirstate::dirstate_map::DirstateVersion;
|
|
|
use crate::dirstate::dirstate_map::{
|
|
|
self, DirstateMap, DirstateMapWriteMode, NodeRef,
|
|
|
};
|
|
|
use crate::dirstate::entry::{
|
|
|
DirstateEntry, DirstateV2Data, TruncatedTimestamp,
|
|
|
};
|
|
|
use crate::dirstate::path_with_basename::WithBasename;
|
|
|
use crate::errors::{HgError, IoResultExt};
|
|
|
use crate::repo::Repo;
|
|
|
use crate::requirements::DIRSTATE_TRACKED_HINT_V1;
|
|
|
use crate::utils::hg_path::HgPath;
|
|
|
use crate::DirstateParents;
|
|
|
use bitflags::bitflags;
|
|
|
use bytes_cast::unaligned::{U16Be, U32Be};
|
|
|
use bytes_cast::BytesCast;
|
|
|
use format_bytes::format_bytes;
|
|
|
use rand::Rng;
|
|
|
use std::borrow::Cow;
|
|
|
use std::fmt::Write;
|
|
|
use uuid::Uuid;
|
|
|
|
|
|
use super::dirstate_map::DirstateIdentity;
|
|
|
use super::DirstateError;
|
|
|
|
|
|
/// Added at the start of `.hg/dirstate` when the "v2" format is used.
|
|
|
/// This a redundant sanity check more than an actual "magic number" since
|
|
|
/// `.hg/requires` already governs which format should be used.
|
|
|
pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
|
|
|
|
|
|
/// Keep space for 256-bit hashes
|
|
|
const STORED_NODE_ID_BYTES: usize = 32;
|
|
|
|
|
|
/// … even though only 160 bits are used for now, with SHA-1
|
|
|
const USED_NODE_ID_BYTES: usize = 20;
|
|
|
|
|
|
pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
|
|
|
pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
|
|
|
|
|
|
/// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
|
|
|
const TREE_METADATA_SIZE: usize = 44;
|
|
|
const NODE_SIZE: usize = 44;
|
|
|
|
|
|
/// Make sure that size-affecting changes are made knowingly
|
|
|
#[allow(unused)]
|
|
|
fn static_assert_size_of() {
|
|
|
let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
|
|
|
let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
|
|
|
let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
|
|
|
}
|
|
|
|
|
|
// Must match `HEADER` in `mercurial/dirstateutils/docket.py`
|
|
|
#[derive(BytesCast)]
|
|
|
#[repr(C)]
|
|
|
struct DocketHeader {
|
|
|
marker: [u8; V2_FORMAT_MARKER.len()],
|
|
|
parent_1: [u8; STORED_NODE_ID_BYTES],
|
|
|
parent_2: [u8; STORED_NODE_ID_BYTES],
|
|
|
|
|
|
metadata: TreeMetadata,
|
|
|
|
|
|
/// Counted in bytes
|
|
|
data_size: Size,
|
|
|
|
|
|
uuid_size: u8,
|
|
|
}
|
|
|
|
|
|
pub struct Docket<'on_disk> {
|
|
|
header: &'on_disk DocketHeader,
|
|
|
pub uuid: &'on_disk [u8],
|
|
|
}
|
|
|
|
|
|
/// Fields are documented in the *Tree metadata in the docket file*
|
|
|
/// section of `mercurial/helptext/internals/dirstate-v2.txt`
|
|
|
#[derive(BytesCast)]
|
|
|
#[repr(C)]
|
|
|
pub struct TreeMetadata {
|
|
|
root_nodes: ChildNodes,
|
|
|
nodes_with_entry_count: Size,
|
|
|
nodes_with_copy_source_count: Size,
|
|
|
unreachable_bytes: Size,
|
|
|
unused: [u8; 4],
|
|
|
|
|
|
/// See *Optional hash of ignore patterns* section of
|
|
|
/// `mercurial/helptext/internals/dirstate-v2.txt`
|
|
|
ignore_patterns_hash: IgnorePatternsHash,
|
|
|
}
|
|
|
|
|
|
/// Fields are documented in the *The data file format*
|
|
|
/// section of `mercurial/helptext/internals/dirstate-v2.txt`
|
|
|
#[derive(BytesCast, Debug)]
|
|
|
#[repr(C)]
|
|
|
pub(super) struct Node {
|
|
|
full_path: PathSlice,
|
|
|
|
|
|
/// In bytes from `self.full_path.start`
|
|
|
base_name_start: PathSize,
|
|
|
|
|
|
copy_source: OptPathSlice,
|
|
|
children: ChildNodes,
|
|
|
pub(super) descendants_with_entry_count: Size,
|
|
|
pub(super) tracked_descendants_count: Size,
|
|
|
flags: U16Be,
|
|
|
size: U32Be,
|
|
|
mtime: PackedTruncatedTimestamp,
|
|
|
}
|
|
|
|
|
|
bitflags! {
|
|
|
#[repr(C)]
|
|
|
struct Flags: u16 {
|
|
|
const WDIR_TRACKED = 1 << 0;
|
|
|
const P1_TRACKED = 1 << 1;
|
|
|
const P2_INFO = 1 << 2;
|
|
|
const MODE_EXEC_PERM = 1 << 3;
|
|
|
const MODE_IS_SYMLINK = 1 << 4;
|
|
|
const HAS_FALLBACK_EXEC = 1 << 5;
|
|
|
const FALLBACK_EXEC = 1 << 6;
|
|
|
const HAS_FALLBACK_SYMLINK = 1 << 7;
|
|
|
const FALLBACK_SYMLINK = 1 << 8;
|
|
|
const EXPECTED_STATE_IS_MODIFIED = 1 << 9;
|
|
|
const HAS_MODE_AND_SIZE = 1 <<10;
|
|
|
const HAS_MTIME = 1 <<11;
|
|
|
const MTIME_SECOND_AMBIGUOUS = 1 << 12;
|
|
|
const DIRECTORY = 1 <<13;
|
|
|
const ALL_UNKNOWN_RECORDED = 1 <<14;
|
|
|
const ALL_IGNORED_RECORDED = 1 <<15;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/// Duration since the Unix epoch
|
|
|
#[derive(BytesCast, Copy, Clone, Debug)]
|
|
|
#[repr(C)]
|
|
|
struct PackedTruncatedTimestamp {
|
|
|
truncated_seconds: U32Be,
|
|
|
nanoseconds: U32Be,
|
|
|
}
|
|
|
|
|
|
/// Counted in bytes from the start of the file
|
|
|
///
|
|
|
/// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
|
|
|
type Offset = U32Be;
|
|
|
|
|
|
/// Counted in number of items
|
|
|
///
|
|
|
/// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
|
|
|
type Size = U32Be;
|
|
|
|
|
|
/// Counted in bytes
|
|
|
///
|
|
|
/// NOTE: we choose not to support file names/paths longer than 64 KiB.
|
|
|
type PathSize = U16Be;
|
|
|
|
|
|
/// A contiguous sequence of `len` times `Node`, representing the child nodes
|
|
|
/// of either some other node or of the repository root.
|
|
|
///
|
|
|
/// Always sorted by ascending `full_path`, to allow binary search.
|
|
|
/// Since nodes with the same parent nodes also have the same parent path,
|
|
|
/// only the `base_name`s need to be compared during binary search.
|
|
|
#[derive(BytesCast, Copy, Clone, Debug)]
|
|
|
#[repr(C)]
|
|
|
struct ChildNodes {
|
|
|
start: Offset,
|
|
|
len: Size,
|
|
|
}
|
|
|
|
|
|
/// A `HgPath` of `len` bytes
|
|
|
#[derive(BytesCast, Copy, Clone, Debug)]
|
|
|
#[repr(C)]
|
|
|
struct PathSlice {
|
|
|
start: Offset,
|
|
|
len: PathSize,
|
|
|
}
|
|
|
|
|
|
/// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
|
|
|
type OptPathSlice = PathSlice;
|
|
|
|
|
|
/// Unexpected file format found in `.hg/dirstate` with the "v2" format.
|
|
|
///
|
|
|
/// This should only happen if Mercurial is buggy or a repository is corrupted.
|
|
|
#[derive(Debug)]
|
|
|
pub struct DirstateV2ParseError {
|
|
|
message: String,
|
|
|
}
|
|
|
|
|
|
impl DirstateV2ParseError {
|
|
|
pub fn new<S: Into<String>>(message: S) -> Self {
|
|
|
Self {
|
|
|
message: message.into(),
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
impl From<DirstateV2ParseError> for HgError {
|
|
|
fn from(e: DirstateV2ParseError) -> Self {
|
|
|
HgError::corrupted(format!("dirstate-v2 parse error: {}", e.message))
|
|
|
}
|
|
|
}
|
|
|
|
|
|
impl From<DirstateV2ParseError> for DirstateError {
|
|
|
fn from(error: DirstateV2ParseError) -> Self {
|
|
|
HgError::from(error).into()
|
|
|
}
|
|
|
}
|
|
|
|
|
|
impl TreeMetadata {
|
|
|
pub fn as_bytes(&self) -> &[u8] {
|
|
|
BytesCast::as_bytes(self)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
impl<'on_disk> Docket<'on_disk> {
|
|
|
/// Generate the identifier for a new data file
|
|
|
///
|
|
|
/// TODO: support the `HGTEST_UUIDFILE` environment variable.
|
|
|
/// See `mercurial/revlogutils/docket.py`
|
|
|
pub fn new_uid() -> String {
|
|
|
const ID_LENGTH: usize = 8;
|
|
|
let mut id = String::with_capacity(ID_LENGTH);
|
|
|
let mut rng = rand::thread_rng();
|
|
|
for _ in 0..ID_LENGTH {
|
|
|
// One random hexadecimal digit.
|
|
|
// `unwrap` never panics because `impl Write for String`
|
|
|
// never returns an error.
|
|
|
write!(&mut id, "{:x}", rng.gen_range(0..16)).unwrap();
|
|
|
}
|
|
|
id
|
|
|
}
|
|
|
|
|
|
pub fn serialize(
|
|
|
parents: DirstateParents,
|
|
|
tree_metadata: TreeMetadata,
|
|
|
data_size: u64,
|
|
|
uuid: &[u8],
|
|
|
) -> Result<Vec<u8>, std::num::TryFromIntError> {
|
|
|
let header = DocketHeader {
|
|
|
marker: *V2_FORMAT_MARKER,
|
|
|
parent_1: parents.p1.pad_to_256_bits(),
|
|
|
parent_2: parents.p2.pad_to_256_bits(),
|
|
|
metadata: tree_metadata,
|
|
|
data_size: u32::try_from(data_size)?.into(),
|
|
|
uuid_size: uuid.len().try_into()?,
|
|
|
};
|
|
|
let header = header.as_bytes();
|
|
|
let mut docket = Vec::with_capacity(header.len() + uuid.len());
|
|
|
docket.extend_from_slice(header);
|
|
|
docket.extend_from_slice(uuid);
|
|
|
Ok(docket)
|
|
|
}
|
|
|
|
|
|
pub fn parents(&self) -> DirstateParents {
|
|
|
use crate::Node;
|
|
|
let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
|
|
|
.unwrap();
|
|
|
let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
|
|
|
.unwrap();
|
|
|
DirstateParents { p1, p2 }
|
|
|
}
|
|
|
|
|
|
pub fn tree_metadata(&self) -> &[u8] {
|
|
|
self.header.metadata.as_bytes()
|
|
|
}
|
|
|
|
|
|
pub fn data_size(&self) -> usize {
|
|
|
// This `unwrap` could only panic on a 16-bit CPU
|
|
|
self.header.data_size.get().try_into().unwrap()
|
|
|
}
|
|
|
|
|
|
pub fn data_filename(&self) -> String {
|
|
|
String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
|
|
|
}
|
|
|
}
|
|
|
|
|
|
pub fn read_docket(
|
|
|
on_disk: &[u8],
|
|
|
) -> Result<Docket<'_>, DirstateV2ParseError> {
|
|
|
let (header, uuid) = DocketHeader::from_bytes(on_disk).map_err(|e| {
|
|
|
DirstateV2ParseError::new(format!("when reading docket, {}", e))
|
|
|
})?;
|
|
|
let uuid_size = header.uuid_size as usize;
|
|
|
if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
|
|
|
Ok(Docket { header, uuid })
|
|
|
} else {
|
|
|
Err(DirstateV2ParseError::new(
|
|
|
"invalid format marker or uuid size",
|
|
|
))
|
|
|
}
|
|
|
}
|
|
|
|
|
|
pub(super) fn read<'on_disk>(
|
|
|
on_disk: &'on_disk [u8],
|
|
|
metadata: &[u8],
|
|
|
uuid: Vec<u8>,
|
|
|
identity: Option<DirstateIdentity>,
|
|
|
) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
|
|
|
if on_disk.is_empty() {
|
|
|
let mut map = DirstateMap::empty(on_disk);
|
|
|
map.identity = identity;
|
|
|
map.old_uuid = Some(uuid);
|
|
|
map.dirstate_version = DirstateVersion::V2;
|
|
|
return Ok(map);
|
|
|
}
|
|
|
let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| {
|
|
|
DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e))
|
|
|
})?;
|
|
|
let dirstate_map = DirstateMap {
|
|
|
on_disk,
|
|
|
root: dirstate_map::ChildNodes::OnDisk(
|
|
|
read_nodes(on_disk, meta.root_nodes).map_err(|mut e| {
|
|
|
e.message = format!("{}, when reading root notes", e.message);
|
|
|
e
|
|
|
})?,
|
|
|
),
|
|
|
nodes_with_entry_count: meta.nodes_with_entry_count.get(),
|
|
|
nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
|
|
|
ignore_patterns_hash: meta.ignore_patterns_hash,
|
|
|
unreachable_bytes: meta.unreachable_bytes.get(),
|
|
|
old_data_size: on_disk.len(),
|
|
|
old_uuid: Some(uuid),
|
|
|
identity,
|
|
|
dirstate_version: DirstateVersion::V2,
|
|
|
write_mode: DirstateMapWriteMode::Auto,
|
|
|
use_tracked_hint: false,
|
|
|
};
|
|
|
Ok(dirstate_map)
|
|
|
}
|
|
|
|
|
|
impl Node {
|
|
|
pub(super) fn full_path<'on_disk>(
|
|
|
&self,
|
|
|
on_disk: &'on_disk [u8],
|
|
|
) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
|
|
|
read_hg_path(on_disk, self.full_path)
|
|
|
}
|
|
|
|
|
|
pub(super) fn base_name_start(
|
|
|
&self,
|
|
|
) -> Result<usize, DirstateV2ParseError> {
|
|
|
let start = self.base_name_start.get();
|
|
|
if start < self.full_path.len.get() {
|
|
|
let start = usize::from(start);
|
|
|
Ok(start)
|
|
|
} else {
|
|
|
Err(DirstateV2ParseError::new("not enough bytes for base name"))
|
|
|
}
|
|
|
}
|
|
|
|
|
|
pub(super) fn base_name<'on_disk>(
|
|
|
&self,
|
|
|
on_disk: &'on_disk [u8],
|
|
|
) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
|
|
|
let full_path = self.full_path(on_disk)?;
|
|
|
let base_name_start = self.base_name_start()?;
|
|
|
Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
|
|
|
}
|
|
|
|
|
|
pub(super) fn path<'on_disk>(
|
|
|
&self,
|
|
|
on_disk: &'on_disk [u8],
|
|
|
) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
|
|
|
Ok(WithBasename::from_raw_parts(
|
|
|
Cow::Borrowed(self.full_path(on_disk)?),
|
|
|
self.base_name_start()?,
|
|
|
))
|
|
|
}
|
|
|
|
|
|
pub(super) fn has_copy_source(&self) -> bool {
|
|
|
self.copy_source.start.get() != 0
|
|
|
}
|
|
|
|
|
|
pub(super) fn copy_source<'on_disk>(
|
|
|
&self,
|
|
|
on_disk: &'on_disk [u8],
|
|
|
) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
|
|
|
Ok(if self.has_copy_source() {
|
|
|
Some(read_hg_path(on_disk, self.copy_source)?)
|
|
|
} else {
|
|
|
None
|
|
|
})
|
|
|
}
|
|
|
|
|
|
fn flags(&self) -> Flags {
|
|
|
Flags::from_bits_truncate(self.flags.get())
|
|
|
}
|
|
|
|
|
|
fn has_entry(&self) -> bool {
|
|
|
self.flags().intersects(
|
|
|
Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
|
|
|
)
|
|
|
}
|
|
|
|
|
|
pub(super) fn node_data(
|
|
|
&self,
|
|
|
) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
|
|
|
if self.has_entry() {
|
|
|
Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
|
|
|
} else if let Some(mtime) = self.cached_directory_mtime()? {
|
|
|
Ok(dirstate_map::NodeData::CachedDirectory { mtime })
|
|
|
} else {
|
|
|
Ok(dirstate_map::NodeData::None)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
pub(super) fn cached_directory_mtime(
|
|
|
&self,
|
|
|
) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
|
|
|
// For now we do not have code to handle the absence of
|
|
|
// ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is
|
|
|
// unset.
|
|
|
if self.flags().contains(Flags::DIRECTORY)
|
|
|
&& self.flags().contains(Flags::HAS_MTIME)
|
|
|
&& self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
|
|
|
{
|
|
|
Ok(Some(self.mtime()?))
|
|
|
} else {
|
|
|
Ok(None)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
fn synthesize_unix_mode(&self) -> u32 {
|
|
|
// Some platforms' libc don't have the same type (MacOS uses i32 here)
|
|
|
#[allow(clippy::unnecessary_cast)]
|
|
|
let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
|
|
|
libc::S_IFLNK as u32
|
|
|
} else {
|
|
|
libc::S_IFREG as u32
|
|
|
};
|
|
|
let permissions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
|
|
|
0o755
|
|
|
} else {
|
|
|
0o644
|
|
|
};
|
|
|
file_type | permissions
|
|
|
}
|
|
|
|
|
|
fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> {
|
|
|
let mut m: TruncatedTimestamp = self.mtime.try_into()?;
|
|
|
if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) {
|
|
|
m.second_ambiguous = true;
|
|
|
}
|
|
|
Ok(m)
|
|
|
}
|
|
|
|
|
|
fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
|
|
|
// TODO: convert through raw bits instead?
|
|
|
let wc_tracked = self.flags().contains(Flags::WDIR_TRACKED);
|
|
|
let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
|
|
|
let p2_info = self.flags().contains(Flags::P2_INFO);
|
|
|
let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
|
|
|
&& !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
|
|
|
{
|
|
|
Some((self.synthesize_unix_mode(), self.size.into()))
|
|
|
} else {
|
|
|
None
|
|
|
};
|
|
|
let mtime = if self.flags().contains(Flags::HAS_MTIME)
|
|
|
&& !self.flags().contains(Flags::DIRECTORY)
|
|
|
&& !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
|
|
|
{
|
|
|
Some(self.mtime()?)
|
|
|
} else {
|
|
|
None
|
|
|
};
|
|
|
let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC)
|
|
|
{
|
|
|
Some(self.flags().contains(Flags::FALLBACK_EXEC))
|
|
|
} else {
|
|
|
None
|
|
|
};
|
|
|
let fallback_symlink =
|
|
|
if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) {
|
|
|
Some(self.flags().contains(Flags::FALLBACK_SYMLINK))
|
|
|
} else {
|
|
|
None
|
|
|
};
|
|
|
Ok(DirstateEntry::from_v2_data(DirstateV2Data {
|
|
|
wc_tracked,
|
|
|
p1_tracked,
|
|
|
p2_info,
|
|
|
mode_size,
|
|
|
mtime,
|
|
|
fallback_exec,
|
|
|
fallback_symlink,
|
|
|
}))
|
|
|
}
|
|
|
|
|
|
pub(super) fn entry(
|
|
|
&self,
|
|
|
) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
|
|
|
if self.has_entry() {
|
|
|
Ok(Some(self.assume_entry()?))
|
|
|
} else {
|
|
|
Ok(None)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
pub(super) fn children<'on_disk>(
|
|
|
&self,
|
|
|
on_disk: &'on_disk [u8],
|
|
|
) -> Result<&'on_disk [Node], DirstateV2ParseError> {
|
|
|
read_nodes(on_disk, self.children)
|
|
|
}
|
|
|
|
|
|
pub(super) fn to_in_memory_node<'on_disk>(
|
|
|
&self,
|
|
|
on_disk: &'on_disk [u8],
|
|
|
) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
|
|
|
Ok(dirstate_map::Node {
|
|
|
children: dirstate_map::ChildNodes::OnDisk(
|
|
|
self.children(on_disk)?,
|
|
|
),
|
|
|
copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
|
|
|
data: self.node_data()?,
|
|
|
descendants_with_entry_count: self
|
|
|
.descendants_with_entry_count
|
|
|
.get(),
|
|
|
tracked_descendants_count: self.tracked_descendants_count.get(),
|
|
|
})
|
|
|
}
|
|
|
|
|
|
fn from_dirstate_entry(
|
|
|
entry: &DirstateEntry,
|
|
|
) -> (Flags, U32Be, PackedTruncatedTimestamp) {
|
|
|
let DirstateV2Data {
|
|
|
wc_tracked,
|
|
|
p1_tracked,
|
|
|
p2_info,
|
|
|
mode_size: mode_size_opt,
|
|
|
mtime: mtime_opt,
|
|
|
fallback_exec,
|
|
|
fallback_symlink,
|
|
|
} = entry.v2_data();
|
|
|
// TODO: convert through raw flag bits instead?
|
|
|
let mut flags = Flags::empty();
|
|
|
flags.set(Flags::WDIR_TRACKED, wc_tracked);
|
|
|
flags.set(Flags::P1_TRACKED, p1_tracked);
|
|
|
flags.set(Flags::P2_INFO, p2_info);
|
|
|
// Some platforms' libc don't have the same type (MacOS uses i32 here)
|
|
|
#[allow(clippy::unnecessary_cast)]
|
|
|
let size = if let Some((m, s)) = mode_size_opt {
|
|
|
let exec_perm = m & (libc::S_IXUSR as u32) != 0;
|
|
|
let is_symlink = m & (libc::S_IFMT as u32) == libc::S_IFLNK as u32;
|
|
|
flags.set(Flags::MODE_EXEC_PERM, exec_perm);
|
|
|
flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
|
|
|
flags.insert(Flags::HAS_MODE_AND_SIZE);
|
|
|
s.into()
|
|
|
} else {
|
|
|
0.into()
|
|
|
};
|
|
|
let mtime = if let Some(m) = mtime_opt {
|
|
|
flags.insert(Flags::HAS_MTIME);
|
|
|
if m.second_ambiguous {
|
|
|
flags.insert(Flags::MTIME_SECOND_AMBIGUOUS);
|
|
|
};
|
|
|
m.into()
|
|
|
} else {
|
|
|
PackedTruncatedTimestamp::null()
|
|
|
};
|
|
|
if let Some(f_exec) = fallback_exec {
|
|
|
flags.insert(Flags::HAS_FALLBACK_EXEC);
|
|
|
if f_exec {
|
|
|
flags.insert(Flags::FALLBACK_EXEC);
|
|
|
}
|
|
|
}
|
|
|
if let Some(f_symlink) = fallback_symlink {
|
|
|
flags.insert(Flags::HAS_FALLBACK_SYMLINK);
|
|
|
if f_symlink {
|
|
|
flags.insert(Flags::FALLBACK_SYMLINK);
|
|
|
}
|
|
|
}
|
|
|
(flags, size, mtime)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
fn read_hg_path(
|
|
|
on_disk: &[u8],
|
|
|
slice: PathSlice,
|
|
|
) -> Result<&HgPath, DirstateV2ParseError> {
|
|
|
read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
|
|
|
}
|
|
|
|
|
|
fn read_nodes(
|
|
|
on_disk: &[u8],
|
|
|
slice: ChildNodes,
|
|
|
) -> Result<&[Node], DirstateV2ParseError> {
|
|
|
read_slice(on_disk, slice.start, slice.len.get())
|
|
|
}
|
|
|
|
|
|
fn read_slice<T, Len>(
|
|
|
on_disk: &[u8],
|
|
|
start: Offset,
|
|
|
len: Len,
|
|
|
) -> Result<&[T], DirstateV2ParseError>
|
|
|
where
|
|
|
T: BytesCast,
|
|
|
Len: TryInto<usize>,
|
|
|
{
|
|
|
// Either `usize::MAX` would result in "out of bounds" error since a single
|
|
|
// `&[u8]` cannot occupy the entire addess space.
|
|
|
let start = start.get().try_into().unwrap_or(usize::MAX);
|
|
|
let len = len.try_into().unwrap_or(usize::MAX);
|
|
|
let bytes = match on_disk.get(start..) {
|
|
|
Some(bytes) => bytes,
|
|
|
None => {
|
|
|
return Err(DirstateV2ParseError::new(
|
|
|
"not enough bytes from disk",
|
|
|
))
|
|
|
}
|
|
|
};
|
|
|
T::slice_from_bytes(bytes, len)
|
|
|
.map_err(|e| {
|
|
|
DirstateV2ParseError::new(format!("when reading a slice, {}", e))
|
|
|
})
|
|
|
.map(|(slice, _rest)| slice)
|
|
|
}
|
|
|
|
|
|
/// Returns new data and metadata, together with whether that data should be
|
|
|
/// appended to the existing data file whose content is at
|
|
|
/// `dirstate_map.on_disk` (true), instead of written to a new data file
|
|
|
/// (false), and the previous size of data on disk.
|
|
|
pub(super) fn write(
|
|
|
dirstate_map: &DirstateMap,
|
|
|
write_mode: DirstateMapWriteMode,
|
|
|
) -> Result<(Vec<u8>, TreeMetadata, bool, usize), DirstateError> {
|
|
|
let append = match write_mode {
|
|
|
DirstateMapWriteMode::Auto => dirstate_map.write_should_append(),
|
|
|
DirstateMapWriteMode::ForceNewDataFile => false,
|
|
|
DirstateMapWriteMode::ForceAppend => true,
|
|
|
};
|
|
|
if append {
|
|
|
log::trace!("appending to the dirstate data file");
|
|
|
} else {
|
|
|
log::trace!("creating new dirstate data file");
|
|
|
}
|
|
|
|
|
|
// This ignores the space for paths, and for nodes without an entry.
|
|
|
// TODO: better estimate? Skip the `Vec` and write to a file directly?
|
|
|
let size_guess = std::mem::size_of::<Node>()
|
|
|
* dirstate_map.nodes_with_entry_count as usize;
|
|
|
|
|
|
let mut writer = Writer {
|
|
|
dirstate_map,
|
|
|
append,
|
|
|
out: Vec::with_capacity(size_guess),
|
|
|
};
|
|
|
|
|
|
let root_nodes = dirstate_map.root.as_ref();
|
|
|
for node in root_nodes.iter() {
|
|
|
// Catch some corruptions before we write to disk
|
|
|
let full_path = node.full_path(dirstate_map.on_disk)?;
|
|
|
let base_name = node.base_name(dirstate_map.on_disk)?;
|
|
|
if full_path != base_name {
|
|
|
let explanation = format!(
|
|
|
"Dirstate root node '{}' is not at the root",
|
|
|
full_path
|
|
|
);
|
|
|
return Err(HgError::corrupted(explanation).into());
|
|
|
}
|
|
|
}
|
|
|
let root_nodes = writer.write_nodes(root_nodes)?;
|
|
|
|
|
|
let unreachable_bytes = if append {
|
|
|
dirstate_map.unreachable_bytes
|
|
|
} else {
|
|
|
0
|
|
|
};
|
|
|
let meta = TreeMetadata {
|
|
|
root_nodes,
|
|
|
nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
|
|
|
nodes_with_copy_source_count: dirstate_map
|
|
|
.nodes_with_copy_source_count
|
|
|
.into(),
|
|
|
unreachable_bytes: unreachable_bytes.into(),
|
|
|
unused: [0; 4],
|
|
|
ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
|
|
|
};
|
|
|
Ok((writer.out, meta, append, dirstate_map.old_data_size))
|
|
|
}
|
|
|
|
|
|
struct Writer<'dmap, 'on_disk> {
|
|
|
dirstate_map: &'dmap DirstateMap<'on_disk>,
|
|
|
append: bool,
|
|
|
out: Vec<u8>,
|
|
|
}
|
|
|
|
|
|
impl Writer<'_, '_> {
|
|
|
fn write_nodes(
|
|
|
&mut self,
|
|
|
nodes: dirstate_map::ChildNodesRef,
|
|
|
) -> Result<ChildNodes, DirstateError> {
|
|
|
// Reuse already-written nodes if possible
|
|
|
if self.append {
|
|
|
if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
|
|
|
let start = self.on_disk_offset_of(nodes_slice).expect(
|
|
|
"dirstate-v2 OnDisk nodes not found within on_disk",
|
|
|
);
|
|
|
let len = child_nodes_len_from_usize(nodes_slice.len());
|
|
|
return Ok(ChildNodes { start, len });
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
|
|
|
// undefined iteration order. Sort to enable binary search in the
|
|
|
// written file.
|
|
|
let nodes = nodes.sorted();
|
|
|
let nodes_len = nodes.len();
|
|
|
|
|
|
// First accumulate serialized nodes in a `Vec`
|
|
|
let mut on_disk_nodes = Vec::with_capacity(nodes_len);
|
|
|
for node in nodes {
|
|
|
let children = node.children(self.dirstate_map.on_disk)?;
|
|
|
let full_path = node.full_path(self.dirstate_map.on_disk)?;
|
|
|
self.check_children(&children, full_path)?;
|
|
|
|
|
|
let children = self.write_nodes(children)?;
|
|
|
let full_path = self.write_path(full_path.as_bytes());
|
|
|
let copy_source = if let Some(source) =
|
|
|
node.copy_source(self.dirstate_map.on_disk)?
|
|
|
{
|
|
|
self.write_path(source.as_bytes())
|
|
|
} else {
|
|
|
PathSlice {
|
|
|
start: 0.into(),
|
|
|
len: 0.into(),
|
|
|
}
|
|
|
};
|
|
|
on_disk_nodes.push(match node {
|
|
|
NodeRef::InMemory(path, node) => {
|
|
|
let (flags, size, mtime) = match &node.data {
|
|
|
dirstate_map::NodeData::Entry(entry) => {
|
|
|
Node::from_dirstate_entry(entry)
|
|
|
}
|
|
|
dirstate_map::NodeData::CachedDirectory { mtime } => {
|
|
|
// we currently never set a mtime if unknown file
|
|
|
// are present.
|
|
|
// So if we have a mtime for a directory, we know
|
|
|
// they are no unknown
|
|
|
// files and we
|
|
|
// blindly set ALL_UNKNOWN_RECORDED.
|
|
|
//
|
|
|
// We never set ALL_IGNORED_RECORDED since we
|
|
|
// don't track that case
|
|
|
// currently.
|
|
|
let mut flags = Flags::DIRECTORY
|
|
|
| Flags::HAS_MTIME
|
|
|
| Flags::ALL_UNKNOWN_RECORDED;
|
|
|
if mtime.second_ambiguous {
|
|
|
flags.insert(Flags::MTIME_SECOND_AMBIGUOUS)
|
|
|
}
|
|
|
(flags, 0.into(), (*mtime).into())
|
|
|
}
|
|
|
dirstate_map::NodeData::None => (
|
|
|
Flags::DIRECTORY,
|
|
|
0.into(),
|
|
|
PackedTruncatedTimestamp::null(),
|
|
|
),
|
|
|
};
|
|
|
Node {
|
|
|
children,
|
|
|
copy_source,
|
|
|
full_path,
|
|
|
base_name_start: u16::try_from(path.base_name_start())
|
|
|
// Could only panic for paths over 64 KiB
|
|
|
.expect("dirstate-v2 path length overflow")
|
|
|
.into(),
|
|
|
descendants_with_entry_count: node
|
|
|
.descendants_with_entry_count
|
|
|
.into(),
|
|
|
tracked_descendants_count: node
|
|
|
.tracked_descendants_count
|
|
|
.into(),
|
|
|
flags: flags.bits().into(),
|
|
|
size,
|
|
|
mtime,
|
|
|
}
|
|
|
}
|
|
|
NodeRef::OnDisk(node) => Node {
|
|
|
children,
|
|
|
copy_source,
|
|
|
full_path,
|
|
|
..*node
|
|
|
},
|
|
|
})
|
|
|
}
|
|
|
// … so we can write them contiguously, after writing everything else
|
|
|
// they refer to.
|
|
|
let start = self.current_offset();
|
|
|
let len = child_nodes_len_from_usize(nodes_len);
|
|
|
self.out.extend(on_disk_nodes.as_bytes());
|
|
|
Ok(ChildNodes { start, len })
|
|
|
}
|
|
|
|
|
|
/// Catch some dirstate corruptions before writing them to disk
|
|
|
fn check_children(
|
|
|
&mut self,
|
|
|
children: &dirstate_map::ChildNodesRef,
|
|
|
full_path: &HgPath,
|
|
|
) -> Result<(), DirstateError> {
|
|
|
for child in children.iter() {
|
|
|
let child_full_path =
|
|
|
child.full_path(self.dirstate_map.on_disk)?;
|
|
|
|
|
|
let prefix_length = child_full_path.len()
|
|
|
// remove the filename
|
|
|
- child.base_name(self.dirstate_map.on_disk)?.len()
|
|
|
// remove the slash
|
|
|
- 1;
|
|
|
|
|
|
let child_prefix = &child_full_path.as_bytes()[..prefix_length];
|
|
|
|
|
|
if child_prefix != full_path.as_bytes() {
|
|
|
let explanation = format!(
|
|
|
"dirstate child node's path '{}' \
|
|
|
does not start with its parent's path '{}'",
|
|
|
child_full_path, full_path,
|
|
|
);
|
|
|
|
|
|
return Err(HgError::corrupted(explanation).into());
|
|
|
}
|
|
|
}
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
/// If the given slice of items is within `on_disk`, returns its offset
|
|
|
/// from the start of `on_disk`.
|
|
|
fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
|
|
|
where
|
|
|
T: BytesCast,
|
|
|
{
|
|
|
fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
|
|
|
let start = slice.as_ptr() as usize;
|
|
|
let end = start + slice.len();
|
|
|
start..=end
|
|
|
}
|
|
|
let slice_addresses = address_range(slice.as_bytes());
|
|
|
let on_disk_addresses = address_range(self.dirstate_map.on_disk);
|
|
|
if on_disk_addresses.contains(slice_addresses.start())
|
|
|
&& on_disk_addresses.contains(slice_addresses.end())
|
|
|
{
|
|
|
let offset = slice_addresses.start() - on_disk_addresses.start();
|
|
|
Some(offset_from_usize(offset))
|
|
|
} else {
|
|
|
None
|
|
|
}
|
|
|
}
|
|
|
|
|
|
fn current_offset(&mut self) -> Offset {
|
|
|
let mut offset = self.out.len();
|
|
|
if self.append {
|
|
|
offset += self.dirstate_map.on_disk.len()
|
|
|
}
|
|
|
offset_from_usize(offset)
|
|
|
}
|
|
|
|
|
|
fn write_path(&mut self, slice: &[u8]) -> PathSlice {
|
|
|
let len = path_len_from_usize(slice.len());
|
|
|
// Reuse an already-written path if possible
|
|
|
if self.append {
|
|
|
if let Some(start) = self.on_disk_offset_of(slice) {
|
|
|
return PathSlice { start, len };
|
|
|
}
|
|
|
}
|
|
|
let start = self.current_offset();
|
|
|
self.out.extend(slice.as_bytes());
|
|
|
PathSlice { start, len }
|
|
|
}
|
|
|
}
|
|
|
|
|
|
fn offset_from_usize(x: usize) -> Offset {
|
|
|
u32::try_from(x)
|
|
|
// Could only panic for a dirstate file larger than 4 GiB
|
|
|
.expect("dirstate-v2 offset overflow")
|
|
|
.into()
|
|
|
}
|
|
|
|
|
|
fn child_nodes_len_from_usize(x: usize) -> Size {
|
|
|
u32::try_from(x)
|
|
|
// Could only panic with over 4 billion nodes
|
|
|
.expect("dirstate-v2 slice length overflow")
|
|
|
.into()
|
|
|
}
|
|
|
|
|
|
fn path_len_from_usize(x: usize) -> PathSize {
|
|
|
u16::try_from(x)
|
|
|
// Could only panic for paths over 64 KiB
|
|
|
.expect("dirstate-v2 path length overflow")
|
|
|
.into()
|
|
|
}
|
|
|
|
|
|
impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
|
|
|
fn from(timestamp: TruncatedTimestamp) -> Self {
|
|
|
Self {
|
|
|
truncated_seconds: timestamp.truncated_seconds().into(),
|
|
|
nanoseconds: timestamp.nanoseconds().into(),
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
|
|
|
type Error = DirstateV2ParseError;
|
|
|
|
|
|
fn try_from(
|
|
|
timestamp: PackedTruncatedTimestamp,
|
|
|
) -> Result<Self, Self::Error> {
|
|
|
Self::from_already_truncated(
|
|
|
timestamp.truncated_seconds.get(),
|
|
|
timestamp.nanoseconds.get(),
|
|
|
false,
|
|
|
)
|
|
|
}
|
|
|
}
|
|
|
impl PackedTruncatedTimestamp {
|
|
|
fn null() -> Self {
|
|
|
Self {
|
|
|
truncated_seconds: 0.into(),
|
|
|
nanoseconds: 0.into(),
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/// Write a new tracked key to disk.
|
|
|
/// See `format.use-dirstate-tracked-hint` config help for more details.
|
|
|
pub fn write_tracked_key(repo: &Repo) -> Result<(), HgError> {
|
|
|
// TODO move this to the dirstate itself once it grows a `dirty` flag and
|
|
|
// can reason about which context it needs to write this in.
|
|
|
// For now, only this fast-path needs to think about the tracked hint.
|
|
|
// Use [`crate::dirstate::dirstate_map::DirstateMap::
|
|
|
// use_tracked_hint`] instead of looking at the requirements once
|
|
|
// refactored.
|
|
|
if !repo.requirements().contains(DIRSTATE_TRACKED_HINT_V1) {
|
|
|
return Ok(());
|
|
|
}
|
|
|
// TODO use `hg_vfs` once the `InnerRevlog` is in.
|
|
|
let path = repo
|
|
|
.working_directory_path()
|
|
|
.join(".hg/dirstate-tracked-hint");
|
|
|
std::fs::write(&path, Uuid::new_v4().as_bytes()).when_writing_file(&path)
|
|
|
}
|
|
|
|