##// END OF EJS Templates
tests: drop remaining `py3` conditionals...
tests: drop remaining `py3` conditionals This includes both `#if ... py3` blocks and `(py3 !)` conditional lines.

File last commit:

r53198:79e8118c default
r53235:d4a0dcd0 default
Show More
on_disk.rs
941 lines | 30.6 KiB | application/rls-services+xml | RustLexer
Raphaël Gomès
rust-dirstate: merge `dirstate_tree` module into `dirstate`...
r53195 //! The "version 2" disk representation of the dirstate
//!
//! See `mercurial/helptext/internals/dirstate-v2.txt`
use crate::dirstate::dirstate_map::DirstateVersion;
use crate::dirstate::dirstate_map::{
self, DirstateMap, DirstateMapWriteMode, NodeRef,
};
Raphaël Gomès
rust-dirstate: remove star exports...
r53196 use crate::dirstate::entry::{
DirstateEntry, DirstateV2Data, TruncatedTimestamp,
};
Raphaël Gomès
rust-dirstate: merge `dirstate_tree` module into `dirstate`...
r53195 use crate::dirstate::path_with_basename::WithBasename;
use crate::errors::{HgError, IoResultExt};
use crate::repo::Repo;
use crate::requirements::DIRSTATE_TRACKED_HINT_V1;
use crate::utils::hg_path::HgPath;
use crate::DirstateParents;
use bitflags::bitflags;
use bytes_cast::unaligned::{U16Be, U32Be};
use bytes_cast::BytesCast;
use format_bytes::format_bytes;
use rand::Rng;
use std::borrow::Cow;
use std::fmt::Write;
use uuid::Uuid;
use super::dirstate_map::DirstateIdentity;
Raphaël Gomès
rust-lib: move `Dirstate*Error` to the `dirstate` module...
r53198 use super::DirstateError;
Raphaël Gomès
rust-dirstate: merge `dirstate_tree` module into `dirstate`...
r53195
/// Added at the start of `.hg/dirstate` when the "v2" format is used.
/// This a redundant sanity check more than an actual "magic number" since
/// `.hg/requires` already governs which format should be used.
pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
/// Keep space for 256-bit hashes
const STORED_NODE_ID_BYTES: usize = 32;
/// … even though only 160 bits are used for now, with SHA-1
const USED_NODE_ID_BYTES: usize = 20;
pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
/// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
const TREE_METADATA_SIZE: usize = 44;
const NODE_SIZE: usize = 44;
/// Make sure that size-affecting changes are made knowingly
#[allow(unused)]
fn static_assert_size_of() {
let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
}
// Must match `HEADER` in `mercurial/dirstateutils/docket.py`
#[derive(BytesCast)]
#[repr(C)]
struct DocketHeader {
marker: [u8; V2_FORMAT_MARKER.len()],
parent_1: [u8; STORED_NODE_ID_BYTES],
parent_2: [u8; STORED_NODE_ID_BYTES],
metadata: TreeMetadata,
/// Counted in bytes
data_size: Size,
uuid_size: u8,
}
pub struct Docket<'on_disk> {
header: &'on_disk DocketHeader,
pub uuid: &'on_disk [u8],
}
/// Fields are documented in the *Tree metadata in the docket file*
/// section of `mercurial/helptext/internals/dirstate-v2.txt`
#[derive(BytesCast)]
#[repr(C)]
pub struct TreeMetadata {
root_nodes: ChildNodes,
nodes_with_entry_count: Size,
nodes_with_copy_source_count: Size,
unreachable_bytes: Size,
unused: [u8; 4],
/// See *Optional hash of ignore patterns* section of
/// `mercurial/helptext/internals/dirstate-v2.txt`
ignore_patterns_hash: IgnorePatternsHash,
}
/// Fields are documented in the *The data file format*
/// section of `mercurial/helptext/internals/dirstate-v2.txt`
#[derive(BytesCast, Debug)]
#[repr(C)]
pub(super) struct Node {
full_path: PathSlice,
/// In bytes from `self.full_path.start`
base_name_start: PathSize,
copy_source: OptPathSlice,
children: ChildNodes,
pub(super) descendants_with_entry_count: Size,
pub(super) tracked_descendants_count: Size,
flags: U16Be,
size: U32Be,
mtime: PackedTruncatedTimestamp,
}
bitflags! {
#[repr(C)]
struct Flags: u16 {
const WDIR_TRACKED = 1 << 0;
const P1_TRACKED = 1 << 1;
const P2_INFO = 1 << 2;
const MODE_EXEC_PERM = 1 << 3;
const MODE_IS_SYMLINK = 1 << 4;
const HAS_FALLBACK_EXEC = 1 << 5;
const FALLBACK_EXEC = 1 << 6;
const HAS_FALLBACK_SYMLINK = 1 << 7;
const FALLBACK_SYMLINK = 1 << 8;
const EXPECTED_STATE_IS_MODIFIED = 1 << 9;
const HAS_MODE_AND_SIZE = 1 <<10;
const HAS_MTIME = 1 <<11;
const MTIME_SECOND_AMBIGUOUS = 1 << 12;
const DIRECTORY = 1 <<13;
const ALL_UNKNOWN_RECORDED = 1 <<14;
const ALL_IGNORED_RECORDED = 1 <<15;
}
}
/// Duration since the Unix epoch
#[derive(BytesCast, Copy, Clone, Debug)]
#[repr(C)]
struct PackedTruncatedTimestamp {
truncated_seconds: U32Be,
nanoseconds: U32Be,
}
/// Counted in bytes from the start of the file
///
/// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
type Offset = U32Be;
/// Counted in number of items
///
/// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
type Size = U32Be;
/// Counted in bytes
///
/// NOTE: we choose not to support file names/paths longer than 64 KiB.
type PathSize = U16Be;
/// A contiguous sequence of `len` times `Node`, representing the child nodes
/// of either some other node or of the repository root.
///
/// Always sorted by ascending `full_path`, to allow binary search.
/// Since nodes with the same parent nodes also have the same parent path,
/// only the `base_name`s need to be compared during binary search.
#[derive(BytesCast, Copy, Clone, Debug)]
#[repr(C)]
struct ChildNodes {
start: Offset,
len: Size,
}
/// A `HgPath` of `len` bytes
#[derive(BytesCast, Copy, Clone, Debug)]
#[repr(C)]
struct PathSlice {
start: Offset,
len: PathSize,
}
/// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
type OptPathSlice = PathSlice;
/// Unexpected file format found in `.hg/dirstate` with the "v2" format.
///
/// This should only happen if Mercurial is buggy or a repository is corrupted.
#[derive(Debug)]
pub struct DirstateV2ParseError {
message: String,
}
impl DirstateV2ParseError {
pub fn new<S: Into<String>>(message: S) -> Self {
Self {
message: message.into(),
}
}
}
impl From<DirstateV2ParseError> for HgError {
fn from(e: DirstateV2ParseError) -> Self {
HgError::corrupted(format!("dirstate-v2 parse error: {}", e.message))
}
}
Raphaël Gomès
rust-lib: move `Dirstate*Error` to the `dirstate` module...
r53198 impl From<DirstateV2ParseError> for DirstateError {
Raphaël Gomès
rust-dirstate: merge `dirstate_tree` module into `dirstate`...
r53195 fn from(error: DirstateV2ParseError) -> Self {
HgError::from(error).into()
}
}
impl TreeMetadata {
pub fn as_bytes(&self) -> &[u8] {
BytesCast::as_bytes(self)
}
}
impl<'on_disk> Docket<'on_disk> {
/// Generate the identifier for a new data file
///
/// TODO: support the `HGTEST_UUIDFILE` environment variable.
/// See `mercurial/revlogutils/docket.py`
pub fn new_uid() -> String {
const ID_LENGTH: usize = 8;
let mut id = String::with_capacity(ID_LENGTH);
let mut rng = rand::thread_rng();
for _ in 0..ID_LENGTH {
// One random hexadecimal digit.
// `unwrap` never panics because `impl Write for String`
// never returns an error.
write!(&mut id, "{:x}", rng.gen_range(0..16)).unwrap();
}
id
}
pub fn serialize(
parents: DirstateParents,
tree_metadata: TreeMetadata,
data_size: u64,
uuid: &[u8],
) -> Result<Vec<u8>, std::num::TryFromIntError> {
let header = DocketHeader {
marker: *V2_FORMAT_MARKER,
parent_1: parents.p1.pad_to_256_bits(),
parent_2: parents.p2.pad_to_256_bits(),
metadata: tree_metadata,
data_size: u32::try_from(data_size)?.into(),
uuid_size: uuid.len().try_into()?,
};
let header = header.as_bytes();
let mut docket = Vec::with_capacity(header.len() + uuid.len());
docket.extend_from_slice(header);
docket.extend_from_slice(uuid);
Ok(docket)
}
pub fn parents(&self) -> DirstateParents {
use crate::Node;
let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
.unwrap();
let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
.unwrap();
DirstateParents { p1, p2 }
}
pub fn tree_metadata(&self) -> &[u8] {
self.header.metadata.as_bytes()
}
pub fn data_size(&self) -> usize {
// This `unwrap` could only panic on a 16-bit CPU
self.header.data_size.get().try_into().unwrap()
}
pub fn data_filename(&self) -> String {
String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
}
}
pub fn read_docket(
on_disk: &[u8],
) -> Result<Docket<'_>, DirstateV2ParseError> {
let (header, uuid) = DocketHeader::from_bytes(on_disk).map_err(|e| {
DirstateV2ParseError::new(format!("when reading docket, {}", e))
})?;
let uuid_size = header.uuid_size as usize;
if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
Ok(Docket { header, uuid })
} else {
Err(DirstateV2ParseError::new(
"invalid format marker or uuid size",
))
}
}
pub(super) fn read<'on_disk>(
on_disk: &'on_disk [u8],
metadata: &[u8],
uuid: Vec<u8>,
identity: Option<DirstateIdentity>,
) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
if on_disk.is_empty() {
let mut map = DirstateMap::empty(on_disk);
map.identity = identity;
map.old_uuid = Some(uuid);
map.dirstate_version = DirstateVersion::V2;
return Ok(map);
}
let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| {
DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e))
})?;
let dirstate_map = DirstateMap {
on_disk,
root: dirstate_map::ChildNodes::OnDisk(
read_nodes(on_disk, meta.root_nodes).map_err(|mut e| {
e.message = format!("{}, when reading root notes", e.message);
e
})?,
),
nodes_with_entry_count: meta.nodes_with_entry_count.get(),
nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
ignore_patterns_hash: meta.ignore_patterns_hash,
unreachable_bytes: meta.unreachable_bytes.get(),
old_data_size: on_disk.len(),
old_uuid: Some(uuid),
identity,
dirstate_version: DirstateVersion::V2,
write_mode: DirstateMapWriteMode::Auto,
use_tracked_hint: false,
};
Ok(dirstate_map)
}
impl Node {
pub(super) fn full_path<'on_disk>(
&self,
on_disk: &'on_disk [u8],
) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
read_hg_path(on_disk, self.full_path)
}
pub(super) fn base_name_start(
&self,
) -> Result<usize, DirstateV2ParseError> {
let start = self.base_name_start.get();
if start < self.full_path.len.get() {
let start = usize::from(start);
Ok(start)
} else {
Err(DirstateV2ParseError::new("not enough bytes for base name"))
}
}
pub(super) fn base_name<'on_disk>(
&self,
on_disk: &'on_disk [u8],
) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
let full_path = self.full_path(on_disk)?;
let base_name_start = self.base_name_start()?;
Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
}
pub(super) fn path<'on_disk>(
&self,
on_disk: &'on_disk [u8],
) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
Ok(WithBasename::from_raw_parts(
Cow::Borrowed(self.full_path(on_disk)?),
self.base_name_start()?,
))
}
pub(super) fn has_copy_source(&self) -> bool {
self.copy_source.start.get() != 0
}
pub(super) fn copy_source<'on_disk>(
&self,
on_disk: &'on_disk [u8],
) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
Ok(if self.has_copy_source() {
Some(read_hg_path(on_disk, self.copy_source)?)
} else {
None
})
}
fn flags(&self) -> Flags {
Flags::from_bits_truncate(self.flags.get())
}
fn has_entry(&self) -> bool {
self.flags().intersects(
Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
)
}
pub(super) fn node_data(
&self,
) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
if self.has_entry() {
Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
} else if let Some(mtime) = self.cached_directory_mtime()? {
Ok(dirstate_map::NodeData::CachedDirectory { mtime })
} else {
Ok(dirstate_map::NodeData::None)
}
}
pub(super) fn cached_directory_mtime(
&self,
) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
// For now we do not have code to handle the absence of
// ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is
// unset.
if self.flags().contains(Flags::DIRECTORY)
&& self.flags().contains(Flags::HAS_MTIME)
&& self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
{
Ok(Some(self.mtime()?))
} else {
Ok(None)
}
}
fn synthesize_unix_mode(&self) -> u32 {
// Some platforms' libc don't have the same type (MacOS uses i32 here)
#[allow(clippy::unnecessary_cast)]
let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
libc::S_IFLNK as u32
} else {
libc::S_IFREG as u32
};
let permissions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
0o755
} else {
0o644
};
file_type | permissions
}
fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> {
let mut m: TruncatedTimestamp = self.mtime.try_into()?;
if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) {
m.second_ambiguous = true;
}
Ok(m)
}
fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
// TODO: convert through raw bits instead?
let wc_tracked = self.flags().contains(Flags::WDIR_TRACKED);
let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
let p2_info = self.flags().contains(Flags::P2_INFO);
let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
&& !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
{
Some((self.synthesize_unix_mode(), self.size.into()))
} else {
None
};
let mtime = if self.flags().contains(Flags::HAS_MTIME)
&& !self.flags().contains(Flags::DIRECTORY)
&& !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
{
Some(self.mtime()?)
} else {
None
};
let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC)
{
Some(self.flags().contains(Flags::FALLBACK_EXEC))
} else {
None
};
let fallback_symlink =
if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) {
Some(self.flags().contains(Flags::FALLBACK_SYMLINK))
} else {
None
};
Ok(DirstateEntry::from_v2_data(DirstateV2Data {
wc_tracked,
p1_tracked,
p2_info,
mode_size,
mtime,
fallback_exec,
fallback_symlink,
}))
}
pub(super) fn entry(
&self,
) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
if self.has_entry() {
Ok(Some(self.assume_entry()?))
} else {
Ok(None)
}
}
pub(super) fn children<'on_disk>(
&self,
on_disk: &'on_disk [u8],
) -> Result<&'on_disk [Node], DirstateV2ParseError> {
read_nodes(on_disk, self.children)
}
pub(super) fn to_in_memory_node<'on_disk>(
&self,
on_disk: &'on_disk [u8],
) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
Ok(dirstate_map::Node {
children: dirstate_map::ChildNodes::OnDisk(
self.children(on_disk)?,
),
copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
data: self.node_data()?,
descendants_with_entry_count: self
.descendants_with_entry_count
.get(),
tracked_descendants_count: self.tracked_descendants_count.get(),
})
}
fn from_dirstate_entry(
entry: &DirstateEntry,
) -> (Flags, U32Be, PackedTruncatedTimestamp) {
let DirstateV2Data {
wc_tracked,
p1_tracked,
p2_info,
mode_size: mode_size_opt,
mtime: mtime_opt,
fallback_exec,
fallback_symlink,
} = entry.v2_data();
// TODO: convert through raw flag bits instead?
let mut flags = Flags::empty();
flags.set(Flags::WDIR_TRACKED, wc_tracked);
flags.set(Flags::P1_TRACKED, p1_tracked);
flags.set(Flags::P2_INFO, p2_info);
// Some platforms' libc don't have the same type (MacOS uses i32 here)
#[allow(clippy::unnecessary_cast)]
let size = if let Some((m, s)) = mode_size_opt {
let exec_perm = m & (libc::S_IXUSR as u32) != 0;
let is_symlink = m & (libc::S_IFMT as u32) == libc::S_IFLNK as u32;
flags.set(Flags::MODE_EXEC_PERM, exec_perm);
flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
flags.insert(Flags::HAS_MODE_AND_SIZE);
s.into()
} else {
0.into()
};
let mtime = if let Some(m) = mtime_opt {
flags.insert(Flags::HAS_MTIME);
if m.second_ambiguous {
flags.insert(Flags::MTIME_SECOND_AMBIGUOUS);
};
m.into()
} else {
PackedTruncatedTimestamp::null()
};
if let Some(f_exec) = fallback_exec {
flags.insert(Flags::HAS_FALLBACK_EXEC);
if f_exec {
flags.insert(Flags::FALLBACK_EXEC);
}
}
if let Some(f_symlink) = fallback_symlink {
flags.insert(Flags::HAS_FALLBACK_SYMLINK);
if f_symlink {
flags.insert(Flags::FALLBACK_SYMLINK);
}
}
(flags, size, mtime)
}
}
fn read_hg_path(
on_disk: &[u8],
slice: PathSlice,
) -> Result<&HgPath, DirstateV2ParseError> {
read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
}
fn read_nodes(
on_disk: &[u8],
slice: ChildNodes,
) -> Result<&[Node], DirstateV2ParseError> {
read_slice(on_disk, slice.start, slice.len.get())
}
fn read_slice<T, Len>(
on_disk: &[u8],
start: Offset,
len: Len,
) -> Result<&[T], DirstateV2ParseError>
where
T: BytesCast,
Len: TryInto<usize>,
{
// Either `usize::MAX` would result in "out of bounds" error since a single
// `&[u8]` cannot occupy the entire addess space.
let start = start.get().try_into().unwrap_or(usize::MAX);
let len = len.try_into().unwrap_or(usize::MAX);
let bytes = match on_disk.get(start..) {
Some(bytes) => bytes,
None => {
return Err(DirstateV2ParseError::new(
"not enough bytes from disk",
))
}
};
T::slice_from_bytes(bytes, len)
.map_err(|e| {
DirstateV2ParseError::new(format!("when reading a slice, {}", e))
})
.map(|(slice, _rest)| slice)
}
/// Returns new data and metadata, together with whether that data should be
/// appended to the existing data file whose content is at
/// `dirstate_map.on_disk` (true), instead of written to a new data file
/// (false), and the previous size of data on disk.
pub(super) fn write(
dirstate_map: &DirstateMap,
write_mode: DirstateMapWriteMode,
) -> Result<(Vec<u8>, TreeMetadata, bool, usize), DirstateError> {
let append = match write_mode {
DirstateMapWriteMode::Auto => dirstate_map.write_should_append(),
DirstateMapWriteMode::ForceNewDataFile => false,
DirstateMapWriteMode::ForceAppend => true,
};
if append {
log::trace!("appending to the dirstate data file");
} else {
log::trace!("creating new dirstate data file");
}
// This ignores the space for paths, and for nodes without an entry.
// TODO: better estimate? Skip the `Vec` and write to a file directly?
let size_guess = std::mem::size_of::<Node>()
* dirstate_map.nodes_with_entry_count as usize;
let mut writer = Writer {
dirstate_map,
append,
out: Vec::with_capacity(size_guess),
};
let root_nodes = dirstate_map.root.as_ref();
for node in root_nodes.iter() {
// Catch some corruptions before we write to disk
let full_path = node.full_path(dirstate_map.on_disk)?;
let base_name = node.base_name(dirstate_map.on_disk)?;
if full_path != base_name {
let explanation = format!(
"Dirstate root node '{}' is not at the root",
full_path
);
return Err(HgError::corrupted(explanation).into());
}
}
let root_nodes = writer.write_nodes(root_nodes)?;
let unreachable_bytes = if append {
dirstate_map.unreachable_bytes
} else {
0
};
let meta = TreeMetadata {
root_nodes,
nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
nodes_with_copy_source_count: dirstate_map
.nodes_with_copy_source_count
.into(),
unreachable_bytes: unreachable_bytes.into(),
unused: [0; 4],
ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
};
Ok((writer.out, meta, append, dirstate_map.old_data_size))
}
struct Writer<'dmap, 'on_disk> {
dirstate_map: &'dmap DirstateMap<'on_disk>,
append: bool,
out: Vec<u8>,
}
impl Writer<'_, '_> {
fn write_nodes(
&mut self,
nodes: dirstate_map::ChildNodesRef,
) -> Result<ChildNodes, DirstateError> {
// Reuse already-written nodes if possible
if self.append {
if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
let start = self.on_disk_offset_of(nodes_slice).expect(
"dirstate-v2 OnDisk nodes not found within on_disk",
);
let len = child_nodes_len_from_usize(nodes_slice.len());
return Ok(ChildNodes { start, len });
}
}
// `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
// undefined iteration order. Sort to enable binary search in the
// written file.
let nodes = nodes.sorted();
let nodes_len = nodes.len();
// First accumulate serialized nodes in a `Vec`
let mut on_disk_nodes = Vec::with_capacity(nodes_len);
for node in nodes {
let children = node.children(self.dirstate_map.on_disk)?;
let full_path = node.full_path(self.dirstate_map.on_disk)?;
self.check_children(&children, full_path)?;
let children = self.write_nodes(children)?;
let full_path = self.write_path(full_path.as_bytes());
let copy_source = if let Some(source) =
node.copy_source(self.dirstate_map.on_disk)?
{
self.write_path(source.as_bytes())
} else {
PathSlice {
start: 0.into(),
len: 0.into(),
}
};
on_disk_nodes.push(match node {
NodeRef::InMemory(path, node) => {
let (flags, size, mtime) = match &node.data {
dirstate_map::NodeData::Entry(entry) => {
Node::from_dirstate_entry(entry)
}
dirstate_map::NodeData::CachedDirectory { mtime } => {
// we currently never set a mtime if unknown file
// are present.
// So if we have a mtime for a directory, we know
// they are no unknown
// files and we
// blindly set ALL_UNKNOWN_RECORDED.
//
// We never set ALL_IGNORED_RECORDED since we
// don't track that case
// currently.
let mut flags = Flags::DIRECTORY
| Flags::HAS_MTIME
| Flags::ALL_UNKNOWN_RECORDED;
if mtime.second_ambiguous {
flags.insert(Flags::MTIME_SECOND_AMBIGUOUS)
}
(flags, 0.into(), (*mtime).into())
}
dirstate_map::NodeData::None => (
Flags::DIRECTORY,
0.into(),
PackedTruncatedTimestamp::null(),
),
};
Node {
children,
copy_source,
full_path,
base_name_start: u16::try_from(path.base_name_start())
// Could only panic for paths over 64 KiB
.expect("dirstate-v2 path length overflow")
.into(),
descendants_with_entry_count: node
.descendants_with_entry_count
.into(),
tracked_descendants_count: node
.tracked_descendants_count
.into(),
flags: flags.bits().into(),
size,
mtime,
}
}
NodeRef::OnDisk(node) => Node {
children,
copy_source,
full_path,
..*node
},
})
}
// … so we can write them contiguously, after writing everything else
// they refer to.
let start = self.current_offset();
let len = child_nodes_len_from_usize(nodes_len);
self.out.extend(on_disk_nodes.as_bytes());
Ok(ChildNodes { start, len })
}
/// Catch some dirstate corruptions before writing them to disk
fn check_children(
&mut self,
children: &dirstate_map::ChildNodesRef,
full_path: &HgPath,
) -> Result<(), DirstateError> {
for child in children.iter() {
let child_full_path =
child.full_path(self.dirstate_map.on_disk)?;
let prefix_length = child_full_path.len()
// remove the filename
- child.base_name(self.dirstate_map.on_disk)?.len()
// remove the slash
- 1;
let child_prefix = &child_full_path.as_bytes()[..prefix_length];
if child_prefix != full_path.as_bytes() {
let explanation = format!(
"dirstate child node's path '{}' \
does not start with its parent's path '{}'",
child_full_path, full_path,
);
return Err(HgError::corrupted(explanation).into());
}
}
Ok(())
}
/// If the given slice of items is within `on_disk`, returns its offset
/// from the start of `on_disk`.
fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
where
T: BytesCast,
{
fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
let start = slice.as_ptr() as usize;
let end = start + slice.len();
start..=end
}
let slice_addresses = address_range(slice.as_bytes());
let on_disk_addresses = address_range(self.dirstate_map.on_disk);
if on_disk_addresses.contains(slice_addresses.start())
&& on_disk_addresses.contains(slice_addresses.end())
{
let offset = slice_addresses.start() - on_disk_addresses.start();
Some(offset_from_usize(offset))
} else {
None
}
}
fn current_offset(&mut self) -> Offset {
let mut offset = self.out.len();
if self.append {
offset += self.dirstate_map.on_disk.len()
}
offset_from_usize(offset)
}
fn write_path(&mut self, slice: &[u8]) -> PathSlice {
let len = path_len_from_usize(slice.len());
// Reuse an already-written path if possible
if self.append {
if let Some(start) = self.on_disk_offset_of(slice) {
return PathSlice { start, len };
}
}
let start = self.current_offset();
self.out.extend(slice.as_bytes());
PathSlice { start, len }
}
}
fn offset_from_usize(x: usize) -> Offset {
u32::try_from(x)
// Could only panic for a dirstate file larger than 4 GiB
.expect("dirstate-v2 offset overflow")
.into()
}
fn child_nodes_len_from_usize(x: usize) -> Size {
u32::try_from(x)
// Could only panic with over 4 billion nodes
.expect("dirstate-v2 slice length overflow")
.into()
}
fn path_len_from_usize(x: usize) -> PathSize {
u16::try_from(x)
// Could only panic for paths over 64 KiB
.expect("dirstate-v2 path length overflow")
.into()
}
impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
fn from(timestamp: TruncatedTimestamp) -> Self {
Self {
truncated_seconds: timestamp.truncated_seconds().into(),
nanoseconds: timestamp.nanoseconds().into(),
}
}
}
impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
type Error = DirstateV2ParseError;
fn try_from(
timestamp: PackedTruncatedTimestamp,
) -> Result<Self, Self::Error> {
Self::from_already_truncated(
timestamp.truncated_seconds.get(),
timestamp.nanoseconds.get(),
false,
)
}
}
impl PackedTruncatedTimestamp {
fn null() -> Self {
Self {
truncated_seconds: 0.into(),
nanoseconds: 0.into(),
}
}
}
/// Write a new tracked key to disk.
/// See `format.use-dirstate-tracked-hint` config help for more details.
pub fn write_tracked_key(repo: &Repo) -> Result<(), HgError> {
// TODO move this to the dirstate itself once it grows a `dirty` flag and
// can reason about which context it needs to write this in.
// For now, only this fast-path needs to think about the tracked hint.
// Use [`crate::dirstate::dirstate_map::DirstateMap::
// use_tracked_hint`] instead of looking at the requirements once
// refactored.
if !repo.requirements().contains(DIRSTATE_TRACKED_HINT_V1) {
return Ok(());
}
// TODO use `hg_vfs` once the `InnerRevlog` is in.
let path = repo
.working_directory_path()
.join(".hg/dirstate-tracked-hint");
std::fs::write(&path, Uuid::new_v4().as_bytes()).when_writing_file(&path)
}