on_disk.rs
941 lines
| 30.6 KiB
| application/rls-services+xml
|
RustLexer
Raphaël Gomès
|
r53195 | //! The "version 2" disk representation of the dirstate | ||
//! | ||||
//! See `mercurial/helptext/internals/dirstate-v2.txt` | ||||
use crate::dirstate::dirstate_map::DirstateVersion; | ||||
use crate::dirstate::dirstate_map::{ | ||||
self, DirstateMap, DirstateMapWriteMode, NodeRef, | ||||
}; | ||||
Raphaël Gomès
|
r53196 | use crate::dirstate::entry::{ | ||
DirstateEntry, DirstateV2Data, TruncatedTimestamp, | ||||
}; | ||||
Raphaël Gomès
|
r53195 | use crate::dirstate::path_with_basename::WithBasename; | ||
use crate::errors::{HgError, IoResultExt}; | ||||
use crate::repo::Repo; | ||||
use crate::requirements::DIRSTATE_TRACKED_HINT_V1; | ||||
use crate::utils::hg_path::HgPath; | ||||
use crate::DirstateParents; | ||||
use bitflags::bitflags; | ||||
use bytes_cast::unaligned::{U16Be, U32Be}; | ||||
use bytes_cast::BytesCast; | ||||
use format_bytes::format_bytes; | ||||
use rand::Rng; | ||||
use std::borrow::Cow; | ||||
use std::fmt::Write; | ||||
use uuid::Uuid; | ||||
use super::dirstate_map::DirstateIdentity; | ||||
Raphaël Gomès
|
r53198 | use super::DirstateError; | ||
Raphaël Gomès
|
r53195 | |||
/// Added at the start of `.hg/dirstate` when the "v2" format is used. | ||||
/// This a redundant sanity check more than an actual "magic number" since | ||||
/// `.hg/requires` already governs which format should be used. | ||||
pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n"; | ||||
/// Keep space for 256-bit hashes | ||||
const STORED_NODE_ID_BYTES: usize = 32; | ||||
/// … even though only 160 bits are used for now, with SHA-1 | ||||
const USED_NODE_ID_BYTES: usize = 20; | ||||
pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20; | ||||
pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN]; | ||||
/// Must match constants of the same names in `mercurial/dirstateutils/v2.py` | ||||
const TREE_METADATA_SIZE: usize = 44; | ||||
const NODE_SIZE: usize = 44; | ||||
/// Make sure that size-affecting changes are made knowingly | ||||
#[allow(unused)] | ||||
fn static_assert_size_of() { | ||||
let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>; | ||||
let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>; | ||||
let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>; | ||||
} | ||||
// Must match `HEADER` in `mercurial/dirstateutils/docket.py` | ||||
#[derive(BytesCast)] | ||||
#[repr(C)] | ||||
struct DocketHeader { | ||||
marker: [u8; V2_FORMAT_MARKER.len()], | ||||
parent_1: [u8; STORED_NODE_ID_BYTES], | ||||
parent_2: [u8; STORED_NODE_ID_BYTES], | ||||
metadata: TreeMetadata, | ||||
/// Counted in bytes | ||||
data_size: Size, | ||||
uuid_size: u8, | ||||
} | ||||
pub struct Docket<'on_disk> { | ||||
header: &'on_disk DocketHeader, | ||||
pub uuid: &'on_disk [u8], | ||||
} | ||||
/// Fields are documented in the *Tree metadata in the docket file* | ||||
/// section of `mercurial/helptext/internals/dirstate-v2.txt` | ||||
#[derive(BytesCast)] | ||||
#[repr(C)] | ||||
pub struct TreeMetadata { | ||||
root_nodes: ChildNodes, | ||||
nodes_with_entry_count: Size, | ||||
nodes_with_copy_source_count: Size, | ||||
unreachable_bytes: Size, | ||||
unused: [u8; 4], | ||||
/// See *Optional hash of ignore patterns* section of | ||||
/// `mercurial/helptext/internals/dirstate-v2.txt` | ||||
ignore_patterns_hash: IgnorePatternsHash, | ||||
} | ||||
/// Fields are documented in the *The data file format* | ||||
/// section of `mercurial/helptext/internals/dirstate-v2.txt` | ||||
#[derive(BytesCast, Debug)] | ||||
#[repr(C)] | ||||
pub(super) struct Node { | ||||
full_path: PathSlice, | ||||
/// In bytes from `self.full_path.start` | ||||
base_name_start: PathSize, | ||||
copy_source: OptPathSlice, | ||||
children: ChildNodes, | ||||
pub(super) descendants_with_entry_count: Size, | ||||
pub(super) tracked_descendants_count: Size, | ||||
flags: U16Be, | ||||
size: U32Be, | ||||
mtime: PackedTruncatedTimestamp, | ||||
} | ||||
bitflags! { | ||||
#[repr(C)] | ||||
struct Flags: u16 { | ||||
const WDIR_TRACKED = 1 << 0; | ||||
const P1_TRACKED = 1 << 1; | ||||
const P2_INFO = 1 << 2; | ||||
const MODE_EXEC_PERM = 1 << 3; | ||||
const MODE_IS_SYMLINK = 1 << 4; | ||||
const HAS_FALLBACK_EXEC = 1 << 5; | ||||
const FALLBACK_EXEC = 1 << 6; | ||||
const HAS_FALLBACK_SYMLINK = 1 << 7; | ||||
const FALLBACK_SYMLINK = 1 << 8; | ||||
const EXPECTED_STATE_IS_MODIFIED = 1 << 9; | ||||
const HAS_MODE_AND_SIZE = 1 <<10; | ||||
const HAS_MTIME = 1 <<11; | ||||
const MTIME_SECOND_AMBIGUOUS = 1 << 12; | ||||
const DIRECTORY = 1 <<13; | ||||
const ALL_UNKNOWN_RECORDED = 1 <<14; | ||||
const ALL_IGNORED_RECORDED = 1 <<15; | ||||
} | ||||
} | ||||
/// Duration since the Unix epoch | ||||
#[derive(BytesCast, Copy, Clone, Debug)] | ||||
#[repr(C)] | ||||
struct PackedTruncatedTimestamp { | ||||
truncated_seconds: U32Be, | ||||
nanoseconds: U32Be, | ||||
} | ||||
/// Counted in bytes from the start of the file | ||||
/// | ||||
/// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB. | ||||
type Offset = U32Be; | ||||
/// Counted in number of items | ||||
/// | ||||
/// NOTE: we choose not to support counting more than 4 billion nodes anywhere. | ||||
type Size = U32Be; | ||||
/// Counted in bytes | ||||
/// | ||||
/// NOTE: we choose not to support file names/paths longer than 64 KiB. | ||||
type PathSize = U16Be; | ||||
/// A contiguous sequence of `len` times `Node`, representing the child nodes | ||||
/// of either some other node or of the repository root. | ||||
/// | ||||
/// Always sorted by ascending `full_path`, to allow binary search. | ||||
/// Since nodes with the same parent nodes also have the same parent path, | ||||
/// only the `base_name`s need to be compared during binary search. | ||||
#[derive(BytesCast, Copy, Clone, Debug)] | ||||
#[repr(C)] | ||||
struct ChildNodes { | ||||
start: Offset, | ||||
len: Size, | ||||
} | ||||
/// A `HgPath` of `len` bytes | ||||
#[derive(BytesCast, Copy, Clone, Debug)] | ||||
#[repr(C)] | ||||
struct PathSlice { | ||||
start: Offset, | ||||
len: PathSize, | ||||
} | ||||
/// Either nothing if `start == 0`, or a `HgPath` of `len` bytes | ||||
type OptPathSlice = PathSlice; | ||||
/// Unexpected file format found in `.hg/dirstate` with the "v2" format. | ||||
/// | ||||
/// This should only happen if Mercurial is buggy or a repository is corrupted. | ||||
#[derive(Debug)] | ||||
pub struct DirstateV2ParseError { | ||||
message: String, | ||||
} | ||||
impl DirstateV2ParseError { | ||||
pub fn new<S: Into<String>>(message: S) -> Self { | ||||
Self { | ||||
message: message.into(), | ||||
} | ||||
} | ||||
} | ||||
impl From<DirstateV2ParseError> for HgError { | ||||
fn from(e: DirstateV2ParseError) -> Self { | ||||
HgError::corrupted(format!("dirstate-v2 parse error: {}", e.message)) | ||||
} | ||||
} | ||||
Raphaël Gomès
|
r53198 | impl From<DirstateV2ParseError> for DirstateError { | ||
Raphaël Gomès
|
r53195 | fn from(error: DirstateV2ParseError) -> Self { | ||
HgError::from(error).into() | ||||
} | ||||
} | ||||
impl TreeMetadata { | ||||
pub fn as_bytes(&self) -> &[u8] { | ||||
BytesCast::as_bytes(self) | ||||
} | ||||
} | ||||
impl<'on_disk> Docket<'on_disk> { | ||||
/// Generate the identifier for a new data file | ||||
/// | ||||
/// TODO: support the `HGTEST_UUIDFILE` environment variable. | ||||
/// See `mercurial/revlogutils/docket.py` | ||||
pub fn new_uid() -> String { | ||||
const ID_LENGTH: usize = 8; | ||||
let mut id = String::with_capacity(ID_LENGTH); | ||||
let mut rng = rand::thread_rng(); | ||||
for _ in 0..ID_LENGTH { | ||||
// One random hexadecimal digit. | ||||
// `unwrap` never panics because `impl Write for String` | ||||
// never returns an error. | ||||
write!(&mut id, "{:x}", rng.gen_range(0..16)).unwrap(); | ||||
} | ||||
id | ||||
} | ||||
pub fn serialize( | ||||
parents: DirstateParents, | ||||
tree_metadata: TreeMetadata, | ||||
data_size: u64, | ||||
uuid: &[u8], | ||||
) -> Result<Vec<u8>, std::num::TryFromIntError> { | ||||
let header = DocketHeader { | ||||
marker: *V2_FORMAT_MARKER, | ||||
parent_1: parents.p1.pad_to_256_bits(), | ||||
parent_2: parents.p2.pad_to_256_bits(), | ||||
metadata: tree_metadata, | ||||
data_size: u32::try_from(data_size)?.into(), | ||||
uuid_size: uuid.len().try_into()?, | ||||
}; | ||||
let header = header.as_bytes(); | ||||
let mut docket = Vec::with_capacity(header.len() + uuid.len()); | ||||
docket.extend_from_slice(header); | ||||
docket.extend_from_slice(uuid); | ||||
Ok(docket) | ||||
} | ||||
pub fn parents(&self) -> DirstateParents { | ||||
use crate::Node; | ||||
let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES]) | ||||
.unwrap(); | ||||
let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES]) | ||||
.unwrap(); | ||||
DirstateParents { p1, p2 } | ||||
} | ||||
pub fn tree_metadata(&self) -> &[u8] { | ||||
self.header.metadata.as_bytes() | ||||
} | ||||
pub fn data_size(&self) -> usize { | ||||
// This `unwrap` could only panic on a 16-bit CPU | ||||
self.header.data_size.get().try_into().unwrap() | ||||
} | ||||
pub fn data_filename(&self) -> String { | ||||
String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap() | ||||
} | ||||
} | ||||
pub fn read_docket( | ||||
on_disk: &[u8], | ||||
) -> Result<Docket<'_>, DirstateV2ParseError> { | ||||
let (header, uuid) = DocketHeader::from_bytes(on_disk).map_err(|e| { | ||||
DirstateV2ParseError::new(format!("when reading docket, {}", e)) | ||||
})?; | ||||
let uuid_size = header.uuid_size as usize; | ||||
if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size { | ||||
Ok(Docket { header, uuid }) | ||||
} else { | ||||
Err(DirstateV2ParseError::new( | ||||
"invalid format marker or uuid size", | ||||
)) | ||||
} | ||||
} | ||||
pub(super) fn read<'on_disk>( | ||||
on_disk: &'on_disk [u8], | ||||
metadata: &[u8], | ||||
uuid: Vec<u8>, | ||||
identity: Option<DirstateIdentity>, | ||||
) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> { | ||||
if on_disk.is_empty() { | ||||
let mut map = DirstateMap::empty(on_disk); | ||||
map.identity = identity; | ||||
map.old_uuid = Some(uuid); | ||||
map.dirstate_version = DirstateVersion::V2; | ||||
return Ok(map); | ||||
} | ||||
let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| { | ||||
DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e)) | ||||
})?; | ||||
let dirstate_map = DirstateMap { | ||||
on_disk, | ||||
root: dirstate_map::ChildNodes::OnDisk( | ||||
read_nodes(on_disk, meta.root_nodes).map_err(|mut e| { | ||||
e.message = format!("{}, when reading root notes", e.message); | ||||
e | ||||
})?, | ||||
), | ||||
nodes_with_entry_count: meta.nodes_with_entry_count.get(), | ||||
nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(), | ||||
ignore_patterns_hash: meta.ignore_patterns_hash, | ||||
unreachable_bytes: meta.unreachable_bytes.get(), | ||||
old_data_size: on_disk.len(), | ||||
old_uuid: Some(uuid), | ||||
identity, | ||||
dirstate_version: DirstateVersion::V2, | ||||
write_mode: DirstateMapWriteMode::Auto, | ||||
use_tracked_hint: false, | ||||
}; | ||||
Ok(dirstate_map) | ||||
} | ||||
impl Node { | ||||
pub(super) fn full_path<'on_disk>( | ||||
&self, | ||||
on_disk: &'on_disk [u8], | ||||
) -> Result<&'on_disk HgPath, DirstateV2ParseError> { | ||||
read_hg_path(on_disk, self.full_path) | ||||
} | ||||
pub(super) fn base_name_start( | ||||
&self, | ||||
) -> Result<usize, DirstateV2ParseError> { | ||||
let start = self.base_name_start.get(); | ||||
if start < self.full_path.len.get() { | ||||
let start = usize::from(start); | ||||
Ok(start) | ||||
} else { | ||||
Err(DirstateV2ParseError::new("not enough bytes for base name")) | ||||
} | ||||
} | ||||
pub(super) fn base_name<'on_disk>( | ||||
&self, | ||||
on_disk: &'on_disk [u8], | ||||
) -> Result<&'on_disk HgPath, DirstateV2ParseError> { | ||||
let full_path = self.full_path(on_disk)?; | ||||
let base_name_start = self.base_name_start()?; | ||||
Ok(HgPath::new(&full_path.as_bytes()[base_name_start..])) | ||||
} | ||||
pub(super) fn path<'on_disk>( | ||||
&self, | ||||
on_disk: &'on_disk [u8], | ||||
) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> { | ||||
Ok(WithBasename::from_raw_parts( | ||||
Cow::Borrowed(self.full_path(on_disk)?), | ||||
self.base_name_start()?, | ||||
)) | ||||
} | ||||
pub(super) fn has_copy_source(&self) -> bool { | ||||
self.copy_source.start.get() != 0 | ||||
} | ||||
pub(super) fn copy_source<'on_disk>( | ||||
&self, | ||||
on_disk: &'on_disk [u8], | ||||
) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> { | ||||
Ok(if self.has_copy_source() { | ||||
Some(read_hg_path(on_disk, self.copy_source)?) | ||||
} else { | ||||
None | ||||
}) | ||||
} | ||||
fn flags(&self) -> Flags { | ||||
Flags::from_bits_truncate(self.flags.get()) | ||||
} | ||||
fn has_entry(&self) -> bool { | ||||
self.flags().intersects( | ||||
Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO, | ||||
) | ||||
} | ||||
pub(super) fn node_data( | ||||
&self, | ||||
) -> Result<dirstate_map::NodeData, DirstateV2ParseError> { | ||||
if self.has_entry() { | ||||
Ok(dirstate_map::NodeData::Entry(self.assume_entry()?)) | ||||
} else if let Some(mtime) = self.cached_directory_mtime()? { | ||||
Ok(dirstate_map::NodeData::CachedDirectory { mtime }) | ||||
} else { | ||||
Ok(dirstate_map::NodeData::None) | ||||
} | ||||
} | ||||
pub(super) fn cached_directory_mtime( | ||||
&self, | ||||
) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> { | ||||
// For now we do not have code to handle the absence of | ||||
// ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is | ||||
// unset. | ||||
if self.flags().contains(Flags::DIRECTORY) | ||||
&& self.flags().contains(Flags::HAS_MTIME) | ||||
&& self.flags().contains(Flags::ALL_UNKNOWN_RECORDED) | ||||
{ | ||||
Ok(Some(self.mtime()?)) | ||||
} else { | ||||
Ok(None) | ||||
} | ||||
} | ||||
fn synthesize_unix_mode(&self) -> u32 { | ||||
// Some platforms' libc don't have the same type (MacOS uses i32 here) | ||||
#[allow(clippy::unnecessary_cast)] | ||||
let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) { | ||||
libc::S_IFLNK as u32 | ||||
} else { | ||||
libc::S_IFREG as u32 | ||||
}; | ||||
let permissions = if self.flags().contains(Flags::MODE_EXEC_PERM) { | ||||
0o755 | ||||
} else { | ||||
0o644 | ||||
}; | ||||
file_type | permissions | ||||
} | ||||
fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> { | ||||
let mut m: TruncatedTimestamp = self.mtime.try_into()?; | ||||
if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) { | ||||
m.second_ambiguous = true; | ||||
} | ||||
Ok(m) | ||||
} | ||||
fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> { | ||||
// TODO: convert through raw bits instead? | ||||
let wc_tracked = self.flags().contains(Flags::WDIR_TRACKED); | ||||
let p1_tracked = self.flags().contains(Flags::P1_TRACKED); | ||||
let p2_info = self.flags().contains(Flags::P2_INFO); | ||||
let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE) | ||||
&& !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED) | ||||
{ | ||||
Some((self.synthesize_unix_mode(), self.size.into())) | ||||
} else { | ||||
None | ||||
}; | ||||
let mtime = if self.flags().contains(Flags::HAS_MTIME) | ||||
&& !self.flags().contains(Flags::DIRECTORY) | ||||
&& !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED) | ||||
{ | ||||
Some(self.mtime()?) | ||||
} else { | ||||
None | ||||
}; | ||||
let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC) | ||||
{ | ||||
Some(self.flags().contains(Flags::FALLBACK_EXEC)) | ||||
} else { | ||||
None | ||||
}; | ||||
let fallback_symlink = | ||||
if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) { | ||||
Some(self.flags().contains(Flags::FALLBACK_SYMLINK)) | ||||
} else { | ||||
None | ||||
}; | ||||
Ok(DirstateEntry::from_v2_data(DirstateV2Data { | ||||
wc_tracked, | ||||
p1_tracked, | ||||
p2_info, | ||||
mode_size, | ||||
mtime, | ||||
fallback_exec, | ||||
fallback_symlink, | ||||
})) | ||||
} | ||||
pub(super) fn entry( | ||||
&self, | ||||
) -> Result<Option<DirstateEntry>, DirstateV2ParseError> { | ||||
if self.has_entry() { | ||||
Ok(Some(self.assume_entry()?)) | ||||
} else { | ||||
Ok(None) | ||||
} | ||||
} | ||||
pub(super) fn children<'on_disk>( | ||||
&self, | ||||
on_disk: &'on_disk [u8], | ||||
) -> Result<&'on_disk [Node], DirstateV2ParseError> { | ||||
read_nodes(on_disk, self.children) | ||||
} | ||||
pub(super) fn to_in_memory_node<'on_disk>( | ||||
&self, | ||||
on_disk: &'on_disk [u8], | ||||
) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> { | ||||
Ok(dirstate_map::Node { | ||||
children: dirstate_map::ChildNodes::OnDisk( | ||||
self.children(on_disk)?, | ||||
), | ||||
copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed), | ||||
data: self.node_data()?, | ||||
descendants_with_entry_count: self | ||||
.descendants_with_entry_count | ||||
.get(), | ||||
tracked_descendants_count: self.tracked_descendants_count.get(), | ||||
}) | ||||
} | ||||
fn from_dirstate_entry( | ||||
entry: &DirstateEntry, | ||||
) -> (Flags, U32Be, PackedTruncatedTimestamp) { | ||||
let DirstateV2Data { | ||||
wc_tracked, | ||||
p1_tracked, | ||||
p2_info, | ||||
mode_size: mode_size_opt, | ||||
mtime: mtime_opt, | ||||
fallback_exec, | ||||
fallback_symlink, | ||||
} = entry.v2_data(); | ||||
// TODO: convert through raw flag bits instead? | ||||
let mut flags = Flags::empty(); | ||||
flags.set(Flags::WDIR_TRACKED, wc_tracked); | ||||
flags.set(Flags::P1_TRACKED, p1_tracked); | ||||
flags.set(Flags::P2_INFO, p2_info); | ||||
// Some platforms' libc don't have the same type (MacOS uses i32 here) | ||||
#[allow(clippy::unnecessary_cast)] | ||||
let size = if let Some((m, s)) = mode_size_opt { | ||||
let exec_perm = m & (libc::S_IXUSR as u32) != 0; | ||||
let is_symlink = m & (libc::S_IFMT as u32) == libc::S_IFLNK as u32; | ||||
flags.set(Flags::MODE_EXEC_PERM, exec_perm); | ||||
flags.set(Flags::MODE_IS_SYMLINK, is_symlink); | ||||
flags.insert(Flags::HAS_MODE_AND_SIZE); | ||||
s.into() | ||||
} else { | ||||
0.into() | ||||
}; | ||||
let mtime = if let Some(m) = mtime_opt { | ||||
flags.insert(Flags::HAS_MTIME); | ||||
if m.second_ambiguous { | ||||
flags.insert(Flags::MTIME_SECOND_AMBIGUOUS); | ||||
}; | ||||
m.into() | ||||
} else { | ||||
PackedTruncatedTimestamp::null() | ||||
}; | ||||
if let Some(f_exec) = fallback_exec { | ||||
flags.insert(Flags::HAS_FALLBACK_EXEC); | ||||
if f_exec { | ||||
flags.insert(Flags::FALLBACK_EXEC); | ||||
} | ||||
} | ||||
if let Some(f_symlink) = fallback_symlink { | ||||
flags.insert(Flags::HAS_FALLBACK_SYMLINK); | ||||
if f_symlink { | ||||
flags.insert(Flags::FALLBACK_SYMLINK); | ||||
} | ||||
} | ||||
(flags, size, mtime) | ||||
} | ||||
} | ||||
fn read_hg_path( | ||||
on_disk: &[u8], | ||||
slice: PathSlice, | ||||
) -> Result<&HgPath, DirstateV2ParseError> { | ||||
read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new) | ||||
} | ||||
fn read_nodes( | ||||
on_disk: &[u8], | ||||
slice: ChildNodes, | ||||
) -> Result<&[Node], DirstateV2ParseError> { | ||||
read_slice(on_disk, slice.start, slice.len.get()) | ||||
} | ||||
fn read_slice<T, Len>( | ||||
on_disk: &[u8], | ||||
start: Offset, | ||||
len: Len, | ||||
) -> Result<&[T], DirstateV2ParseError> | ||||
where | ||||
T: BytesCast, | ||||
Len: TryInto<usize>, | ||||
{ | ||||
// Either `usize::MAX` would result in "out of bounds" error since a single | ||||
// `&[u8]` cannot occupy the entire addess space. | ||||
let start = start.get().try_into().unwrap_or(usize::MAX); | ||||
let len = len.try_into().unwrap_or(usize::MAX); | ||||
let bytes = match on_disk.get(start..) { | ||||
Some(bytes) => bytes, | ||||
None => { | ||||
return Err(DirstateV2ParseError::new( | ||||
"not enough bytes from disk", | ||||
)) | ||||
} | ||||
}; | ||||
T::slice_from_bytes(bytes, len) | ||||
.map_err(|e| { | ||||
DirstateV2ParseError::new(format!("when reading a slice, {}", e)) | ||||
}) | ||||
.map(|(slice, _rest)| slice) | ||||
} | ||||
/// Returns new data and metadata, together with whether that data should be | ||||
/// appended to the existing data file whose content is at | ||||
/// `dirstate_map.on_disk` (true), instead of written to a new data file | ||||
/// (false), and the previous size of data on disk. | ||||
pub(super) fn write( | ||||
dirstate_map: &DirstateMap, | ||||
write_mode: DirstateMapWriteMode, | ||||
) -> Result<(Vec<u8>, TreeMetadata, bool, usize), DirstateError> { | ||||
let append = match write_mode { | ||||
DirstateMapWriteMode::Auto => dirstate_map.write_should_append(), | ||||
DirstateMapWriteMode::ForceNewDataFile => false, | ||||
DirstateMapWriteMode::ForceAppend => true, | ||||
}; | ||||
if append { | ||||
log::trace!("appending to the dirstate data file"); | ||||
} else { | ||||
log::trace!("creating new dirstate data file"); | ||||
} | ||||
// This ignores the space for paths, and for nodes without an entry. | ||||
// TODO: better estimate? Skip the `Vec` and write to a file directly? | ||||
let size_guess = std::mem::size_of::<Node>() | ||||
* dirstate_map.nodes_with_entry_count as usize; | ||||
let mut writer = Writer { | ||||
dirstate_map, | ||||
append, | ||||
out: Vec::with_capacity(size_guess), | ||||
}; | ||||
let root_nodes = dirstate_map.root.as_ref(); | ||||
for node in root_nodes.iter() { | ||||
// Catch some corruptions before we write to disk | ||||
let full_path = node.full_path(dirstate_map.on_disk)?; | ||||
let base_name = node.base_name(dirstate_map.on_disk)?; | ||||
if full_path != base_name { | ||||
let explanation = format!( | ||||
"Dirstate root node '{}' is not at the root", | ||||
full_path | ||||
); | ||||
return Err(HgError::corrupted(explanation).into()); | ||||
} | ||||
} | ||||
let root_nodes = writer.write_nodes(root_nodes)?; | ||||
let unreachable_bytes = if append { | ||||
dirstate_map.unreachable_bytes | ||||
} else { | ||||
0 | ||||
}; | ||||
let meta = TreeMetadata { | ||||
root_nodes, | ||||
nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(), | ||||
nodes_with_copy_source_count: dirstate_map | ||||
.nodes_with_copy_source_count | ||||
.into(), | ||||
unreachable_bytes: unreachable_bytes.into(), | ||||
unused: [0; 4], | ||||
ignore_patterns_hash: dirstate_map.ignore_patterns_hash, | ||||
}; | ||||
Ok((writer.out, meta, append, dirstate_map.old_data_size)) | ||||
} | ||||
struct Writer<'dmap, 'on_disk> { | ||||
dirstate_map: &'dmap DirstateMap<'on_disk>, | ||||
append: bool, | ||||
out: Vec<u8>, | ||||
} | ||||
impl Writer<'_, '_> { | ||||
fn write_nodes( | ||||
&mut self, | ||||
nodes: dirstate_map::ChildNodesRef, | ||||
) -> Result<ChildNodes, DirstateError> { | ||||
// Reuse already-written nodes if possible | ||||
if self.append { | ||||
if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes { | ||||
let start = self.on_disk_offset_of(nodes_slice).expect( | ||||
"dirstate-v2 OnDisk nodes not found within on_disk", | ||||
); | ||||
let len = child_nodes_len_from_usize(nodes_slice.len()); | ||||
return Ok(ChildNodes { start, len }); | ||||
} | ||||
} | ||||
// `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has | ||||
// undefined iteration order. Sort to enable binary search in the | ||||
// written file. | ||||
let nodes = nodes.sorted(); | ||||
let nodes_len = nodes.len(); | ||||
// First accumulate serialized nodes in a `Vec` | ||||
let mut on_disk_nodes = Vec::with_capacity(nodes_len); | ||||
for node in nodes { | ||||
let children = node.children(self.dirstate_map.on_disk)?; | ||||
let full_path = node.full_path(self.dirstate_map.on_disk)?; | ||||
self.check_children(&children, full_path)?; | ||||
let children = self.write_nodes(children)?; | ||||
let full_path = self.write_path(full_path.as_bytes()); | ||||
let copy_source = if let Some(source) = | ||||
node.copy_source(self.dirstate_map.on_disk)? | ||||
{ | ||||
self.write_path(source.as_bytes()) | ||||
} else { | ||||
PathSlice { | ||||
start: 0.into(), | ||||
len: 0.into(), | ||||
} | ||||
}; | ||||
on_disk_nodes.push(match node { | ||||
NodeRef::InMemory(path, node) => { | ||||
let (flags, size, mtime) = match &node.data { | ||||
dirstate_map::NodeData::Entry(entry) => { | ||||
Node::from_dirstate_entry(entry) | ||||
} | ||||
dirstate_map::NodeData::CachedDirectory { mtime } => { | ||||
// we currently never set a mtime if unknown file | ||||
// are present. | ||||
// So if we have a mtime for a directory, we know | ||||
// they are no unknown | ||||
// files and we | ||||
// blindly set ALL_UNKNOWN_RECORDED. | ||||
// | ||||
// We never set ALL_IGNORED_RECORDED since we | ||||
// don't track that case | ||||
// currently. | ||||
let mut flags = Flags::DIRECTORY | ||||
| Flags::HAS_MTIME | ||||
| Flags::ALL_UNKNOWN_RECORDED; | ||||
if mtime.second_ambiguous { | ||||
flags.insert(Flags::MTIME_SECOND_AMBIGUOUS) | ||||
} | ||||
(flags, 0.into(), (*mtime).into()) | ||||
} | ||||
dirstate_map::NodeData::None => ( | ||||
Flags::DIRECTORY, | ||||
0.into(), | ||||
PackedTruncatedTimestamp::null(), | ||||
), | ||||
}; | ||||
Node { | ||||
children, | ||||
copy_source, | ||||
full_path, | ||||
base_name_start: u16::try_from(path.base_name_start()) | ||||
// Could only panic for paths over 64 KiB | ||||
.expect("dirstate-v2 path length overflow") | ||||
.into(), | ||||
descendants_with_entry_count: node | ||||
.descendants_with_entry_count | ||||
.into(), | ||||
tracked_descendants_count: node | ||||
.tracked_descendants_count | ||||
.into(), | ||||
flags: flags.bits().into(), | ||||
size, | ||||
mtime, | ||||
} | ||||
} | ||||
NodeRef::OnDisk(node) => Node { | ||||
children, | ||||
copy_source, | ||||
full_path, | ||||
..*node | ||||
}, | ||||
}) | ||||
} | ||||
// … so we can write them contiguously, after writing everything else | ||||
// they refer to. | ||||
let start = self.current_offset(); | ||||
let len = child_nodes_len_from_usize(nodes_len); | ||||
self.out.extend(on_disk_nodes.as_bytes()); | ||||
Ok(ChildNodes { start, len }) | ||||
} | ||||
/// Catch some dirstate corruptions before writing them to disk | ||||
fn check_children( | ||||
&mut self, | ||||
children: &dirstate_map::ChildNodesRef, | ||||
full_path: &HgPath, | ||||
) -> Result<(), DirstateError> { | ||||
for child in children.iter() { | ||||
let child_full_path = | ||||
child.full_path(self.dirstate_map.on_disk)?; | ||||
let prefix_length = child_full_path.len() | ||||
// remove the filename | ||||
- child.base_name(self.dirstate_map.on_disk)?.len() | ||||
// remove the slash | ||||
- 1; | ||||
let child_prefix = &child_full_path.as_bytes()[..prefix_length]; | ||||
if child_prefix != full_path.as_bytes() { | ||||
let explanation = format!( | ||||
"dirstate child node's path '{}' \ | ||||
does not start with its parent's path '{}'", | ||||
child_full_path, full_path, | ||||
); | ||||
return Err(HgError::corrupted(explanation).into()); | ||||
} | ||||
} | ||||
Ok(()) | ||||
} | ||||
/// If the given slice of items is within `on_disk`, returns its offset | ||||
/// from the start of `on_disk`. | ||||
fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset> | ||||
where | ||||
T: BytesCast, | ||||
{ | ||||
fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> { | ||||
let start = slice.as_ptr() as usize; | ||||
let end = start + slice.len(); | ||||
start..=end | ||||
} | ||||
let slice_addresses = address_range(slice.as_bytes()); | ||||
let on_disk_addresses = address_range(self.dirstate_map.on_disk); | ||||
if on_disk_addresses.contains(slice_addresses.start()) | ||||
&& on_disk_addresses.contains(slice_addresses.end()) | ||||
{ | ||||
let offset = slice_addresses.start() - on_disk_addresses.start(); | ||||
Some(offset_from_usize(offset)) | ||||
} else { | ||||
None | ||||
} | ||||
} | ||||
fn current_offset(&mut self) -> Offset { | ||||
let mut offset = self.out.len(); | ||||
if self.append { | ||||
offset += self.dirstate_map.on_disk.len() | ||||
} | ||||
offset_from_usize(offset) | ||||
} | ||||
fn write_path(&mut self, slice: &[u8]) -> PathSlice { | ||||
let len = path_len_from_usize(slice.len()); | ||||
// Reuse an already-written path if possible | ||||
if self.append { | ||||
if let Some(start) = self.on_disk_offset_of(slice) { | ||||
return PathSlice { start, len }; | ||||
} | ||||
} | ||||
let start = self.current_offset(); | ||||
self.out.extend(slice.as_bytes()); | ||||
PathSlice { start, len } | ||||
} | ||||
} | ||||
fn offset_from_usize(x: usize) -> Offset { | ||||
u32::try_from(x) | ||||
// Could only panic for a dirstate file larger than 4 GiB | ||||
.expect("dirstate-v2 offset overflow") | ||||
.into() | ||||
} | ||||
fn child_nodes_len_from_usize(x: usize) -> Size { | ||||
u32::try_from(x) | ||||
// Could only panic with over 4 billion nodes | ||||
.expect("dirstate-v2 slice length overflow") | ||||
.into() | ||||
} | ||||
fn path_len_from_usize(x: usize) -> PathSize { | ||||
u16::try_from(x) | ||||
// Could only panic for paths over 64 KiB | ||||
.expect("dirstate-v2 path length overflow") | ||||
.into() | ||||
} | ||||
impl From<TruncatedTimestamp> for PackedTruncatedTimestamp { | ||||
fn from(timestamp: TruncatedTimestamp) -> Self { | ||||
Self { | ||||
truncated_seconds: timestamp.truncated_seconds().into(), | ||||
nanoseconds: timestamp.nanoseconds().into(), | ||||
} | ||||
} | ||||
} | ||||
impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp { | ||||
type Error = DirstateV2ParseError; | ||||
fn try_from( | ||||
timestamp: PackedTruncatedTimestamp, | ||||
) -> Result<Self, Self::Error> { | ||||
Self::from_already_truncated( | ||||
timestamp.truncated_seconds.get(), | ||||
timestamp.nanoseconds.get(), | ||||
false, | ||||
) | ||||
} | ||||
} | ||||
impl PackedTruncatedTimestamp { | ||||
fn null() -> Self { | ||||
Self { | ||||
truncated_seconds: 0.into(), | ||||
nanoseconds: 0.into(), | ||||
} | ||||
} | ||||
} | ||||
/// Write a new tracked key to disk. | ||||
/// See `format.use-dirstate-tracked-hint` config help for more details. | ||||
pub fn write_tracked_key(repo: &Repo) -> Result<(), HgError> { | ||||
// TODO move this to the dirstate itself once it grows a `dirty` flag and | ||||
// can reason about which context it needs to write this in. | ||||
// For now, only this fast-path needs to think about the tracked hint. | ||||
// Use [`crate::dirstate::dirstate_map::DirstateMap:: | ||||
// use_tracked_hint`] instead of looking at the requirements once | ||||
// refactored. | ||||
if !repo.requirements().contains(DIRSTATE_TRACKED_HINT_V1) { | ||||
return Ok(()); | ||||
} | ||||
// TODO use `hg_vfs` once the `InnerRevlog` is in. | ||||
let path = repo | ||||
.working_directory_path() | ||||
.join(".hg/dirstate-tracked-hint"); | ||||
std::fs::write(&path, Uuid::new_v4().as_bytes()).when_writing_file(&path) | ||||
} | ||||