on_disk.rs
877 lines
| 28.3 KiB
| application/rls-services+xml
|
RustLexer
Simon Sapin
|
r48058 | //! The "version 2" disk representation of the dirstate | ||
//! | ||||
Simon Sapin
|
r48978 | //! See `mercurial/helptext/internals/dirstate-v2.txt` | ||
Simon Sapin
|
r48058 | |||
Raphaël Gomès
|
r49991 | use crate::dirstate::{DirstateV2Data, TruncatedTimestamp}; | ||
Raphaël Gomès
|
r50232 | use crate::dirstate_tree::dirstate_map::DirstateVersion; | ||
Simon Sapin
|
r48124 | use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef}; | ||
Simon Sapin
|
r48058 | use crate::dirstate_tree::path_with_basename::WithBasename; | ||
use crate::errors::HgError; | ||||
use crate::utils::hg_path::HgPath; | ||||
use crate::DirstateEntry; | ||||
use crate::DirstateError; | ||||
use crate::DirstateParents; | ||||
Simon Sapin
|
r48951 | use bitflags::bitflags; | ||
Simon Sapin
|
r49008 | use bytes_cast::unaligned::{U16Be, U32Be}; | ||
Simon Sapin
|
r48058 | use bytes_cast::BytesCast; | ||
Simon Sapin
|
r48474 | use format_bytes::format_bytes; | ||
Simon Sapin
|
r49249 | use rand::Rng; | ||
Simon Sapin
|
r48058 | use std::borrow::Cow; | ||
Simon Sapin
|
r49249 | use std::fmt::Write; | ||
Simon Sapin
|
r48058 | |||
Simon Sapin
|
r48055 | /// Added at the start of `.hg/dirstate` when the "v2" format is used. | ||
Simon Sapin
|
r48058 | /// This a redundant sanity check more than an actual "magic number" since | ||
/// `.hg/requires` already governs which format should be used. | ||||
Simon Sapin
|
r48055 | pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n"; | ||
Simon Sapin
|
r48058 | |||
Simon Sapin
|
r48474 | /// Keep space for 256-bit hashes | ||
const STORED_NODE_ID_BYTES: usize = 32; | ||||
/// … even though only 160 bits are used for now, with SHA-1 | ||||
const USED_NODE_ID_BYTES: usize = 20; | ||||
Simon Sapin
|
r48202 | pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20; | ||
pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN]; | ||||
Simon Sapin
|
r49035 | /// Must match constants of the same names in `mercurial/dirstateutils/v2.py` | ||
Simon Sapin
|
r48484 | const TREE_METADATA_SIZE: usize = 44; | ||
Simon Sapin
|
r49045 | const NODE_SIZE: usize = 44; | ||
Simon Sapin
|
r49010 | |||
Simon Sapin
|
r48482 | /// Make sure that size-affecting changes are made knowingly | ||
#[allow(unused)] | ||||
fn static_assert_size_of() { | ||||
let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>; | ||||
Simon Sapin
|
r48484 | let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>; | ||
Simon Sapin
|
r49010 | let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>; | ||
Simon Sapin
|
r48482 | } | ||
Simon Sapin
|
r48474 | // Must match `HEADER` in `mercurial/dirstateutils/docket.py` | ||
#[derive(BytesCast)] | ||||
#[repr(C)] | ||||
struct DocketHeader { | ||||
marker: [u8; V2_FORMAT_MARKER.len()], | ||||
parent_1: [u8; STORED_NODE_ID_BYTES], | ||||
parent_2: [u8; STORED_NODE_ID_BYTES], | ||||
Simon Sapin
|
r48477 | |||
Simon Sapin
|
r48977 | metadata: TreeMetadata, | ||
Simon Sapin
|
r48477 | /// Counted in bytes | ||
Simon Sapin
|
r48474 | data_size: Size, | ||
Simon Sapin
|
r48477 | |||
Simon Sapin
|
r48474 | uuid_size: u8, | ||
} | ||||
pub struct Docket<'on_disk> { | ||||
header: &'on_disk DocketHeader, | ||||
Simon Sapin
|
r49248 | pub uuid: &'on_disk [u8], | ||
Simon Sapin
|
r48474 | } | ||
Simon Sapin
|
r49002 | /// Fields are documented in the *Tree metadata in the docket file* | ||
/// section of `mercurial/helptext/internals/dirstate-v2.txt` | ||||
Simon Sapin
|
r48058 | #[derive(BytesCast)] | ||
#[repr(C)] | ||||
Simon Sapin
|
r49249 | pub struct TreeMetadata { | ||
Simon Sapin
|
r48476 | root_nodes: ChildNodes, | ||
Simon Sapin
|
r48058 | nodes_with_entry_count: Size, | ||
nodes_with_copy_source_count: Size, | ||||
Simon Sapin
|
r48481 | unreachable_bytes: Size, | ||
Simon Sapin
|
r48484 | unused: [u8; 4], | ||
Simon Sapin
|
r49002 | /// See *Optional hash of ignore patterns* section of | ||
/// `mercurial/helptext/internals/dirstate-v2.txt` | ||||
Simon Sapin
|
r48202 | ignore_patterns_hash: IgnorePatternsHash, | ||
Simon Sapin
|
r48058 | } | ||
Simon Sapin
|
r49002 | /// Fields are documented in the *The data file format* | ||
/// section of `mercurial/helptext/internals/dirstate-v2.txt` | ||||
Raphaël Gomès
|
r50016 | #[derive(BytesCast, Debug)] | ||
Simon Sapin
|
r48058 | #[repr(C)] | ||
Simon Sapin
|
r48128 | pub(super) struct Node { | ||
Simon Sapin
|
r48058 | full_path: PathSlice, | ||
/// In bytes from `self.full_path.start` | ||||
Simon Sapin
|
r48477 | base_name_start: PathSize, | ||
Simon Sapin
|
r48058 | |||
copy_source: OptPathSlice, | ||||
children: ChildNodes, | ||||
Simon Sapin
|
r48272 | pub(super) descendants_with_entry_count: Size, | ||
Simon Sapin
|
r48128 | pub(super) tracked_descendants_count: Size, | ||
Simon Sapin
|
r49045 | flags: U16Be, | ||
Simon Sapin
|
r49033 | size: U32Be, | ||
mtime: PackedTruncatedTimestamp, | ||||
Simon Sapin
|
r48058 | } | ||
Simon Sapin
|
r48951 | bitflags! { | ||
#[repr(C)] | ||||
Simon Sapin
|
r49045 | struct Flags: u16 { | ||
Simon Sapin
|
r48951 | const WDIR_TRACKED = 1 << 0; | ||
const P1_TRACKED = 1 << 1; | ||||
const P2_INFO = 1 << 2; | ||||
r49085 | const MODE_EXEC_PERM = 1 << 3; | |||
const MODE_IS_SYMLINK = 1 << 4; | ||||
const HAS_FALLBACK_EXEC = 1 << 5; | ||||
const FALLBACK_EXEC = 1 << 6; | ||||
const HAS_FALLBACK_SYMLINK = 1 << 7; | ||||
const FALLBACK_SYMLINK = 1 << 8; | ||||
const EXPECTED_STATE_IS_MODIFIED = 1 << 9; | ||||
const HAS_MODE_AND_SIZE = 1 <<10; | ||||
const HAS_MTIME = 1 <<11; | ||||
const MTIME_SECOND_AMBIGUOUS = 1 << 12; | ||||
const DIRECTORY = 1 <<13; | ||||
const ALL_UNKNOWN_RECORDED = 1 <<14; | ||||
const ALL_IGNORED_RECORDED = 1 <<15; | ||||
Simon Sapin
|
r48951 | } | ||
} | ||||
Simon Sapin
|
r48137 | /// Duration since the Unix epoch | ||
Raphaël Gomès
|
r50016 | #[derive(BytesCast, Copy, Clone, Debug)] | ||
Simon Sapin
|
r48137 | #[repr(C)] | ||
Simon Sapin
|
r49033 | struct PackedTruncatedTimestamp { | ||
Simon Sapin
|
r49007 | truncated_seconds: U32Be, | ||
Simon Sapin
|
r48137 | nanoseconds: U32Be, | ||
} | ||||
Simon Sapin
|
r48058 | /// Counted in bytes from the start of the file | ||
/// | ||||
Simon Sapin
|
r48270 | /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB. | ||
type Offset = U32Be; | ||||
Simon Sapin
|
r48058 | |||
/// Counted in number of items | ||||
/// | ||||
Simon Sapin
|
r48477 | /// NOTE: we choose not to support counting more than 4 billion nodes anywhere. | ||
Simon Sapin
|
r48058 | type Size = U32Be; | ||
Simon Sapin
|
r48477 | /// Counted in bytes | ||
Simon Sapin
|
r48058 | /// | ||
Simon Sapin
|
r48477 | /// NOTE: we choose not to support file names/paths longer than 64 KiB. | ||
type PathSize = U16Be; | ||||
Simon Sapin
|
r48058 | |||
/// A contiguous sequence of `len` times `Node`, representing the child nodes | ||||
/// of either some other node or of the repository root. | ||||
/// | ||||
/// Always sorted by ascending `full_path`, to allow binary search. | ||||
/// Since nodes with the same parent nodes also have the same parent path, | ||||
/// only the `base_name`s need to be compared during binary search. | ||||
Raphaël Gomès
|
r50016 | #[derive(BytesCast, Copy, Clone, Debug)] | ||
Simon Sapin
|
r48477 | #[repr(C)] | ||
struct ChildNodes { | ||||
start: Offset, | ||||
len: Size, | ||||
} | ||||
Simon Sapin
|
r48058 | |||
/// A `HgPath` of `len` bytes | ||||
Raphaël Gomès
|
r50016 | #[derive(BytesCast, Copy, Clone, Debug)] | ||
Simon Sapin
|
r48477 | #[repr(C)] | ||
struct PathSlice { | ||||
start: Offset, | ||||
len: PathSize, | ||||
} | ||||
Simon Sapin
|
r48058 | |||
/// Either nothing if `start == 0`, or a `HgPath` of `len` bytes | ||||
Simon Sapin
|
r48477 | type OptPathSlice = PathSlice; | ||
Simon Sapin
|
r48058 | |||
Simon Sapin
|
r48125 | /// Unexpected file format found in `.hg/dirstate` with the "v2" format. | ||
Simon Sapin
|
r48126 | /// | ||
/// This should only happen if Mercurial is buggy or a repository is corrupted. | ||||
#[derive(Debug)] | ||||
Raphaël Gomès
|
r50268 | pub struct DirstateV2ParseError { | ||
message: String, | ||||
} | ||||
impl DirstateV2ParseError { | ||||
pub fn new<S: Into<String>>(message: S) -> Self { | ||||
Self { | ||||
message: message.into(), | ||||
} | ||||
} | ||||
} | ||||
Simon Sapin
|
r48125 | |||
impl From<DirstateV2ParseError> for HgError { | ||||
Raphaël Gomès
|
r50268 | fn from(e: DirstateV2ParseError) -> Self { | ||
HgError::corrupted(format!("dirstate-v2 parse error: {}", e.message)) | ||||
Simon Sapin
|
r48125 | } | ||
} | ||||
impl From<DirstateV2ParseError> for crate::DirstateError { | ||||
fn from(error: DirstateV2ParseError) -> Self { | ||||
HgError::from(error).into() | ||||
} | ||||
} | ||||
Simon Sapin
|
r49249 | impl TreeMetadata { | ||
pub fn as_bytes(&self) -> &[u8] { | ||||
BytesCast::as_bytes(self) | ||||
} | ||||
} | ||||
Simon Sapin
|
r48474 | impl<'on_disk> Docket<'on_disk> { | ||
Simon Sapin
|
r49249 | /// Generate the identifier for a new data file | ||
/// | ||||
/// TODO: support the `HGTEST_UUIDFILE` environment variable. | ||||
/// See `mercurial/revlogutils/docket.py` | ||||
pub fn new_uid() -> String { | ||||
const ID_LENGTH: usize = 8; | ||||
let mut id = String::with_capacity(ID_LENGTH); | ||||
let mut rng = rand::thread_rng(); | ||||
for _ in 0..ID_LENGTH { | ||||
// One random hexadecimal digit. | ||||
// `unwrap` never panics because `impl Write for String` | ||||
// never returns an error. | ||||
Martin von Zweigbergk
|
r49403 | write!(&mut id, "{:x}", rng.gen_range(0..16)).unwrap(); | ||
Simon Sapin
|
r49249 | } | ||
id | ||||
} | ||||
pub fn serialize( | ||||
parents: DirstateParents, | ||||
tree_metadata: TreeMetadata, | ||||
data_size: u64, | ||||
uuid: &[u8], | ||||
) -> Result<Vec<u8>, std::num::TryFromIntError> { | ||||
let header = DocketHeader { | ||||
marker: *V2_FORMAT_MARKER, | ||||
parent_1: parents.p1.pad_to_256_bits(), | ||||
parent_2: parents.p2.pad_to_256_bits(), | ||||
metadata: tree_metadata, | ||||
data_size: u32::try_from(data_size)?.into(), | ||||
uuid_size: uuid.len().try_into()?, | ||||
}; | ||||
let header = header.as_bytes(); | ||||
let mut docket = Vec::with_capacity(header.len() + uuid.len()); | ||||
docket.extend_from_slice(header); | ||||
docket.extend_from_slice(uuid); | ||||
Ok(docket) | ||||
} | ||||
Simon Sapin
|
r48474 | pub fn parents(&self) -> DirstateParents { | ||
use crate::Node; | ||||
let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES]) | ||||
.unwrap() | ||||
.clone(); | ||||
let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES]) | ||||
.unwrap() | ||||
.clone(); | ||||
DirstateParents { p1, p2 } | ||||
} | ||||
Simon Sapin
|
r48482 | pub fn tree_metadata(&self) -> &[u8] { | ||
self.header.metadata.as_bytes() | ||||
} | ||||
Simon Sapin
|
r48475 | pub fn data_size(&self) -> usize { | ||
// This `unwrap` could only panic on a 16-bit CPU | ||||
self.header.data_size.get().try_into().unwrap() | ||||
} | ||||
Simon Sapin
|
r48474 | pub fn data_filename(&self) -> String { | ||
Simon Sapin
|
r48780 | String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap() | ||
Simon Sapin
|
r48474 | } | ||
} | ||||
pub fn read_docket( | ||||
on_disk: &[u8], | ||||
) -> Result<Docket<'_>, DirstateV2ParseError> { | ||||
Raphaël Gomès
|
r50268 | let (header, uuid) = DocketHeader::from_bytes(on_disk).map_err(|e| { | ||
DirstateV2ParseError::new(format!("when reading docket, {}", e)) | ||||
})?; | ||||
Simon Sapin
|
r48474 | let uuid_size = header.uuid_size as usize; | ||
if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size { | ||||
Ok(Docket { header, uuid }) | ||||
Simon Sapin
|
r48165 | } else { | ||
Raphaël Gomès
|
r50268 | Err(DirstateV2ParseError::new( | ||
"invalid format marker or uuid size", | ||||
)) | ||||
Simon Sapin
|
r48165 | } | ||
} | ||||
Simon Sapin
|
r48058 | pub(super) fn read<'on_disk>( | ||
on_disk: &'on_disk [u8], | ||||
Simon Sapin
|
r48482 | metadata: &[u8], | ||
Simon Sapin
|
r48474 | ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> { | ||
Simon Sapin
|
r48058 | if on_disk.is_empty() { | ||
Raphaël Gomès
|
r50232 | let mut map = DirstateMap::empty(on_disk); | ||
map.dirstate_version = DirstateVersion::V2; | ||||
return Ok(map); | ||||
Simon Sapin
|
r48058 | } | ||
Raphaël Gomès
|
r50268 | let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| { | ||
DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e)) | ||||
})?; | ||||
Simon Sapin
|
r48058 | let dirstate_map = DirstateMap { | ||
on_disk, | ||||
Raphaël Gomès
|
r50268 | root: dirstate_map::ChildNodes::OnDisk( | ||
read_nodes(on_disk, meta.root_nodes).map_err(|mut e| { | ||||
e.message = format!("{}, when reading root notes", e.message); | ||||
e | ||||
})?, | ||||
), | ||||
Simon Sapin
|
r48482 | nodes_with_entry_count: meta.nodes_with_entry_count.get(), | ||
nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(), | ||||
ignore_patterns_hash: meta.ignore_patterns_hash, | ||||
unreachable_bytes: meta.unreachable_bytes.get(), | ||||
Raphaël Gomès
|
r50037 | old_data_size: on_disk.len(), | ||
Raphaël Gomès
|
r50232 | dirstate_version: DirstateVersion::V2, | ||
Simon Sapin
|
r48058 | }; | ||
Simon Sapin
|
r48474 | Ok(dirstate_map) | ||
Simon Sapin
|
r48058 | } | ||
impl Node { | ||||
Simon Sapin
|
r48128 | pub(super) fn full_path<'on_disk>( | ||
Simon Sapin
|
r48058 | &self, | ||
on_disk: &'on_disk [u8], | ||||
Simon Sapin
|
r48128 | ) -> Result<&'on_disk HgPath, DirstateV2ParseError> { | ||
read_hg_path(on_disk, self.full_path) | ||||
} | ||||
pub(super) fn base_name_start<'on_disk>( | ||||
&self, | ||||
) -> Result<usize, DirstateV2ParseError> { | ||||
let start = self.base_name_start.get(); | ||||
if start < self.full_path.len.get() { | ||||
let start = usize::try_from(start) | ||||
// u32 -> usize, could only panic on a 16-bit CPU | ||||
.expect("dirstate-v2 base_name_start out of bounds"); | ||||
Ok(start) | ||||
Simon Sapin
|
r48058 | } else { | ||
Raphaël Gomès
|
r50268 | Err(DirstateV2ParseError::new("not enough bytes for base name")) | ||
Simon Sapin
|
r48058 | } | ||
} | ||||
Simon Sapin
|
r48128 | pub(super) fn base_name<'on_disk>( | ||
&self, | ||||
on_disk: &'on_disk [u8], | ||||
) -> Result<&'on_disk HgPath, DirstateV2ParseError> { | ||||
let full_path = self.full_path(on_disk)?; | ||||
let base_name_start = self.base_name_start()?; | ||||
Ok(HgPath::new(&full_path.as_bytes()[base_name_start..])) | ||||
} | ||||
pub(super) fn path<'on_disk>( | ||||
&self, | ||||
on_disk: &'on_disk [u8], | ||||
) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> { | ||||
Ok(WithBasename::from_raw_parts( | ||||
Cow::Borrowed(self.full_path(on_disk)?), | ||||
self.base_name_start()?, | ||||
)) | ||||
} | ||||
pub(super) fn has_copy_source<'on_disk>(&self) -> bool { | ||||
self.copy_source.start.get() != 0 | ||||
} | ||||
Simon Sapin
|
r48058 | pub(super) fn copy_source<'on_disk>( | ||
&self, | ||||
on_disk: &'on_disk [u8], | ||||
Simon Sapin
|
r48128 | ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> { | ||
Ok(if self.has_copy_source() { | ||||
Simon Sapin
|
r48058 | Some(read_hg_path(on_disk, self.copy_source)?) | ||
} else { | ||||
None | ||||
}) | ||||
} | ||||
Simon Sapin
|
r49045 | fn flags(&self) -> Flags { | ||
Flags::from_bits_truncate(self.flags.get()) | ||||
} | ||||
Simon Sapin
|
r48951 | fn has_entry(&self) -> bool { | ||
Simon Sapin
|
r49045 | self.flags().intersects( | ||
Simon Sapin
|
r48951 | Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO, | ||
) | ||||
} | ||||
Simon Sapin
|
r48137 | pub(super) fn node_data( | ||
&self, | ||||
) -> Result<dirstate_map::NodeData, DirstateV2ParseError> { | ||||
Simon Sapin
|
r48951 | if self.has_entry() { | ||
Simon Sapin
|
r49079 | Ok(dirstate_map::NodeData::Entry(self.assume_entry()?)) | ||
Simon Sapin
|
r49007 | } else if let Some(mtime) = self.cached_directory_mtime()? { | ||
Simon Sapin
|
r48951 | Ok(dirstate_map::NodeData::CachedDirectory { mtime }) | ||
} else { | ||||
Ok(dirstate_map::NodeData::None) | ||||
Simon Sapin
|
r48137 | } | ||
Simon Sapin
|
r48128 | } | ||
Simon Sapin
|
r49007 | pub(super) fn cached_directory_mtime( | ||
&self, | ||||
) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> { | ||||
r49083 | // For now we do not have code to handle the absence of | |||
// ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is | ||||
// unset. | ||||
if self.flags().contains(Flags::DIRECTORY) | ||||
&& self.flags().contains(Flags::HAS_MTIME) | ||||
r49067 | && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED) | |||
{ | ||||
Simon Sapin
|
r49332 | Ok(Some(self.mtime()?)) | ||
Simon Sapin
|
r49046 | } else { | ||
Ok(None) | ||||
} | ||||
Simon Sapin
|
r48138 | } | ||
Simon Sapin
|
r49009 | fn synthesize_unix_mode(&self) -> u32 { | ||
Simon Sapin
|
r49045 | let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) { | ||
Simon Sapin
|
r49009 | libc::S_IFLNK | ||
} else { | ||||
libc::S_IFREG | ||||
}; | ||||
Simon Sapin
|
r49045 | let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) { | ||
Simon Sapin
|
r49009 | 0o755 | ||
} else { | ||||
0o644 | ||||
}; | ||||
Dan Villiom Podlaski Christiansen
|
r49417 | (file_type | permisions).into() | ||
Simon Sapin
|
r49009 | } | ||
Simon Sapin
|
r49332 | fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> { | ||
let mut m: TruncatedTimestamp = self.mtime.try_into()?; | ||||
if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) { | ||||
m.second_ambiguous = true; | ||||
} | ||||
Ok(m) | ||||
} | ||||
Simon Sapin
|
r49079 | fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> { | ||
Simon Sapin
|
r48951 | // TODO: convert through raw bits instead? | ||
Raphaël Gomès
|
r49991 | let wc_tracked = self.flags().contains(Flags::WDIR_TRACKED); | ||
Simon Sapin
|
r49045 | let p1_tracked = self.flags().contains(Flags::P1_TRACKED); | ||
let p2_info = self.flags().contains(Flags::P2_INFO); | ||||
Simon Sapin
|
r49066 | let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE) | ||
&& !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED) | ||||
{ | ||||
Simon Sapin
|
r49033 | Some((self.synthesize_unix_mode(), self.size.into())) | ||
Simon Sapin
|
r48951 | } else { | ||
None | ||||
}; | ||||
r49083 | let mtime = if self.flags().contains(Flags::HAS_MTIME) | |||
&& !self.flags().contains(Flags::DIRECTORY) | ||||
Simon Sapin
|
r49066 | && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED) | ||
{ | ||||
Simon Sapin
|
r49332 | Some(self.mtime()?) | ||
Simon Sapin
|
r48951 | } else { | ||
None | ||||
}; | ||||
r49084 | let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC) | |||
{ | ||||
Some(self.flags().contains(Flags::FALLBACK_EXEC)) | ||||
} else { | ||||
None | ||||
}; | ||||
let fallback_symlink = | ||||
if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) { | ||||
Some(self.flags().contains(Flags::FALLBACK_SYMLINK)) | ||||
} else { | ||||
None | ||||
}; | ||||
Raphaël Gomès
|
r49991 | Ok(DirstateEntry::from_v2_data(DirstateV2Data { | ||
wc_tracked, | ||||
Simon Sapin
|
r48951 | p1_tracked, | ||
p2_info, | ||||
mode_size, | ||||
mtime, | ||||
r49084 | fallback_exec, | |||
fallback_symlink, | ||||
Raphaël Gomès
|
r49991 | })) | ||
Simon Sapin
|
r48128 | } | ||
Simon Sapin
|
r48125 | pub(super) fn entry( | ||
&self, | ||||
) -> Result<Option<DirstateEntry>, DirstateV2ParseError> { | ||||
Simon Sapin
|
r48951 | if self.has_entry() { | ||
Simon Sapin
|
r49079 | Ok(Some(self.assume_entry()?)) | ||
Simon Sapin
|
r48951 | } else { | ||
Ok(None) | ||||
} | ||||
Simon Sapin
|
r48128 | } | ||
pub(super) fn children<'on_disk>( | ||||
&self, | ||||
on_disk: &'on_disk [u8], | ||||
) -> Result<&'on_disk [Node], DirstateV2ParseError> { | ||||
Simon Sapin
|
r48477 | read_nodes(on_disk, self.children) | ||
Simon Sapin
|
r48058 | } | ||
pub(super) fn to_in_memory_node<'on_disk>( | ||||
&self, | ||||
on_disk: &'on_disk [u8], | ||||
Simon Sapin
|
r48125 | ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> { | ||
Simon Sapin
|
r48058 | Ok(dirstate_map::Node { | ||
Simon Sapin
|
r48128 | children: dirstate_map::ChildNodes::OnDisk( | ||
self.children(on_disk)?, | ||||
), | ||||
copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed), | ||||
Simon Sapin
|
r48137 | data: self.node_data()?, | ||
Simon Sapin
|
r48272 | descendants_with_entry_count: self | ||
.descendants_with_entry_count | ||||
.get(), | ||||
Simon Sapin
|
r48058 | tracked_descendants_count: self.tracked_descendants_count.get(), | ||
}) | ||||
} | ||||
Simon Sapin
|
r49033 | fn from_dirstate_entry( | ||
entry: &DirstateEntry, | ||||
) -> (Flags, U32Be, PackedTruncatedTimestamp) { | ||||
Raphaël Gomès
|
r49991 | let DirstateV2Data { | ||
wc_tracked, | ||||
r49070 | p1_tracked, | |||
p2_info, | ||||
Raphaël Gomès
|
r49991 | mode_size: mode_size_opt, | ||
mtime: mtime_opt, | ||||
r49070 | fallback_exec, | |||
fallback_symlink, | ||||
Raphaël Gomès
|
r49991 | } = entry.v2_data(); | ||
// TODO: convert through raw flag bits instead? | ||||
Simon Sapin
|
r48951 | let mut flags = Flags::empty(); | ||
Raphaël Gomès
|
r49991 | flags.set(Flags::WDIR_TRACKED, wc_tracked); | ||
Simon Sapin
|
r48951 | flags.set(Flags::P1_TRACKED, p1_tracked); | ||
flags.set(Flags::P2_INFO, p2_info); | ||||
Simon Sapin
|
r49033 | let size = if let Some((m, s)) = mode_size_opt { | ||
Dan Villiom Podlaski Christiansen
|
r49417 | let exec_perm = m & (libc::S_IXUSR as u32) != 0; | ||
let is_symlink = m & (libc::S_IFMT as u32) == libc::S_IFLNK as u32; | ||||
Simon Sapin
|
r49009 | flags.set(Flags::MODE_EXEC_PERM, exec_perm); | ||
flags.set(Flags::MODE_IS_SYMLINK, is_symlink); | ||||
Simon Sapin
|
r49033 | flags.insert(Flags::HAS_MODE_AND_SIZE); | ||
s.into() | ||||
Simon Sapin
|
r48951 | } else { | ||
Simon Sapin
|
r49033 | 0.into() | ||
Simon Sapin
|
r48951 | }; | ||
Simon Sapin
|
r49033 | let mtime = if let Some(m) = mtime_opt { | ||
r49083 | flags.insert(Flags::HAS_MTIME); | |||
r49230 | if m.second_ambiguous { | |||
flags.insert(Flags::MTIME_SECOND_AMBIGUOUS); | ||||
}; | ||||
Simon Sapin
|
r49079 | m.into() | ||
Simon Sapin
|
r49033 | } else { | ||
PackedTruncatedTimestamp::null() | ||||
Simon Sapin
|
r49006 | }; | ||
r49070 | if let Some(f_exec) = fallback_exec { | |||
flags.insert(Flags::HAS_FALLBACK_EXEC); | ||||
if f_exec { | ||||
flags.insert(Flags::FALLBACK_EXEC); | ||||
} | ||||
} | ||||
if let Some(f_symlink) = fallback_symlink { | ||||
flags.insert(Flags::HAS_FALLBACK_SYMLINK); | ||||
if f_symlink { | ||||
flags.insert(Flags::FALLBACK_SYMLINK); | ||||
} | ||||
} | ||||
Simon Sapin
|
r49033 | (flags, size, mtime) | ||
Simon Sapin
|
r48137 | } | ||
} | ||||
Simon Sapin
|
r48125 | fn read_hg_path( | ||
on_disk: &[u8], | ||||
Simon Sapin
|
r48477 | slice: PathSlice, | ||
Simon Sapin
|
r48128 | ) -> Result<&HgPath, DirstateV2ParseError> { | ||
Simon Sapin
|
r48477 | read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new) | ||
Simon Sapin
|
r48058 | } | ||
Simon Sapin
|
r48477 | fn read_nodes( | ||
Simon Sapin
|
r48125 | on_disk: &[u8], | ||
Simon Sapin
|
r48477 | slice: ChildNodes, | ||
) -> Result<&[Node], DirstateV2ParseError> { | ||||
read_slice(on_disk, slice.start, slice.len.get()) | ||||
} | ||||
fn read_slice<T, Len>( | ||||
on_disk: &[u8], | ||||
start: Offset, | ||||
len: Len, | ||||
Simon Sapin
|
r48125 | ) -> Result<&[T], DirstateV2ParseError> | ||
Simon Sapin
|
r48058 | where | ||
T: BytesCast, | ||||
Simon Sapin
|
r48477 | Len: TryInto<usize>, | ||
Simon Sapin
|
r48058 | { | ||
// Either `usize::MAX` would result in "out of bounds" error since a single | ||||
// `&[u8]` cannot occupy the entire addess space. | ||||
Simon Sapin
|
r48477 | let start = start.get().try_into().unwrap_or(std::usize::MAX); | ||
let len = len.try_into().unwrap_or(std::usize::MAX); | ||||
Raphaël Gomès
|
r50268 | let bytes = match on_disk.get(start..) { | ||
Some(bytes) => bytes, | ||||
None => { | ||||
return Err(DirstateV2ParseError::new( | ||||
"not enough bytes from disk", | ||||
)) | ||||
} | ||||
}; | ||||
T::slice_from_bytes(bytes, len) | ||||
.map_err(|e| { | ||||
DirstateV2ParseError::new(format!("when reading a slice, {}", e)) | ||||
}) | ||||
Simon Sapin
|
r48058 | .map(|(slice, _rest)| slice) | ||
} | ||||
Simon Sapin
|
r48165 | pub(crate) fn for_each_tracked_path<'on_disk>( | ||
on_disk: &'on_disk [u8], | ||||
Simon Sapin
|
r48482 | metadata: &[u8], | ||
Simon Sapin
|
r48165 | mut f: impl FnMut(&'on_disk HgPath), | ||
) -> Result<(), DirstateV2ParseError> { | ||||
Raphaël Gomès
|
r50268 | let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| { | ||
DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e)) | ||||
})?; | ||||
Simon Sapin
|
r48165 | fn recur<'on_disk>( | ||
on_disk: &'on_disk [u8], | ||||
Simon Sapin
|
r48477 | nodes: ChildNodes, | ||
Simon Sapin
|
r48165 | f: &mut impl FnMut(&'on_disk HgPath), | ||
) -> Result<(), DirstateV2ParseError> { | ||||
Simon Sapin
|
r48477 | for node in read_nodes(on_disk, nodes)? { | ||
Simon Sapin
|
r48951 | if let Some(entry) = node.entry()? { | ||
Raphaël Gomès
|
r50027 | if entry.tracked() { | ||
Simon Sapin
|
r48165 | f(node.full_path(on_disk)?) | ||
} | ||||
} | ||||
recur(on_disk, node.children, f)? | ||||
} | ||||
Ok(()) | ||||
} | ||||
Simon Sapin
|
r48482 | recur(on_disk, meta.root_nodes, &mut f) | ||
Simon Sapin
|
r48165 | } | ||
Simon Sapin
|
r48482 | /// Returns new data and metadata, together with whether that data should be | ||
/// appended to the existing data file whose content is at | ||||
/// `dirstate_map.on_disk` (true), instead of written to a new data file | ||||
Raphaël Gomès
|
r50037 | /// (false), and the previous size of data on disk. | ||
Simon Sapin
|
r48058 | pub(super) fn write( | ||
Simon Sapin
|
r49244 | dirstate_map: &DirstateMap, | ||
Simon Sapin
|
r48478 | can_append: bool, | ||
Raphaël Gomès
|
r50037 | ) -> Result<(Vec<u8>, TreeMetadata, bool, usize), DirstateError> { | ||
Simon Sapin
|
r48478 | let append = can_append && dirstate_map.write_should_append(); | ||
Simon Sapin
|
r48058 | |||
// This ignores the space for paths, and for nodes without an entry. | ||||
// TODO: better estimate? Skip the `Vec` and write to a file directly? | ||||
Simon Sapin
|
r48482 | let size_guess = std::mem::size_of::<Node>() | ||
* dirstate_map.nodes_with_entry_count as usize; | ||||
Simon Sapin
|
r48058 | |||
Simon Sapin
|
r48478 | let mut writer = Writer { | ||
dirstate_map, | ||||
append, | ||||
out: Vec::with_capacity(size_guess), | ||||
}; | ||||
let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?; | ||||
Simon Sapin
|
r48058 | |||
Raphaël Gomès
|
r50050 | let unreachable_bytes = if append { | ||
dirstate_map.unreachable_bytes | ||||
} else { | ||||
0 | ||||
}; | ||||
Simon Sapin
|
r48482 | let meta = TreeMetadata { | ||
Simon Sapin
|
r48476 | root_nodes, | ||
Simon Sapin
|
r48058 | nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(), | ||
nodes_with_copy_source_count: dirstate_map | ||||
.nodes_with_copy_source_count | ||||
.into(), | ||||
Raphaël Gomès
|
r50050 | unreachable_bytes: unreachable_bytes.into(), | ||
Simon Sapin
|
r48484 | unused: [0; 4], | ||
Simon Sapin
|
r48202 | ignore_patterns_hash: dirstate_map.ignore_patterns_hash, | ||
Simon Sapin
|
r48058 | }; | ||
Raphaël Gomès
|
r50037 | Ok((writer.out, meta, append, dirstate_map.old_data_size)) | ||
Simon Sapin
|
r48478 | } | ||
struct Writer<'dmap, 'on_disk> { | ||||
dirstate_map: &'dmap DirstateMap<'on_disk>, | ||||
append: bool, | ||||
out: Vec<u8>, | ||||
Simon Sapin
|
r48058 | } | ||
Simon Sapin
|
r48478 | impl Writer<'_, '_> { | ||
fn write_nodes( | ||||
&mut self, | ||||
nodes: dirstate_map::ChildNodesRef, | ||||
) -> Result<ChildNodes, DirstateError> { | ||||
Simon Sapin
|
r48479 | // Reuse already-written nodes if possible | ||
if self.append { | ||||
if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes { | ||||
Simon Sapin
|
r48480 | let start = self.on_disk_offset_of(nodes_slice).expect( | ||
"dirstate-v2 OnDisk nodes not found within on_disk", | ||||
); | ||||
Simon Sapin
|
r48479 | let len = child_nodes_len_from_usize(nodes_slice.len()); | ||
return Ok(ChildNodes { start, len }); | ||||
} | ||||
} | ||||
// `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has | ||||
// undefined iteration order. Sort to enable binary search in the | ||||
// written file. | ||||
Simon Sapin
|
r48478 | let nodes = nodes.sorted(); | ||
let nodes_len = nodes.len(); | ||||
Simon Sapin
|
r48058 | |||
Simon Sapin
|
r48478 | // First accumulate serialized nodes in a `Vec` | ||
let mut on_disk_nodes = Vec::with_capacity(nodes_len); | ||||
for node in nodes { | ||||
let children = | ||||
self.write_nodes(node.children(self.dirstate_map.on_disk)?)?; | ||||
let full_path = node.full_path(self.dirstate_map.on_disk)?; | ||||
let full_path = self.write_path(full_path.as_bytes()); | ||||
let copy_source = if let Some(source) = | ||||
node.copy_source(self.dirstate_map.on_disk)? | ||||
{ | ||||
self.write_path(source.as_bytes()) | ||||
Simon Sapin
|
r48127 | } else { | ||
Simon Sapin
|
r48477 | PathSlice { | ||
Simon Sapin
|
r48127 | start: 0.into(), | ||
len: 0.into(), | ||||
} | ||||
}; | ||||
Simon Sapin
|
r48478 | on_disk_nodes.push(match node { | ||
NodeRef::InMemory(path, node) => { | ||||
Simon Sapin
|
r49033 | let (flags, size, mtime) = match &node.data { | ||
Simon Sapin
|
r48951 | dirstate_map::NodeData::Entry(entry) => { | ||
Simon Sapin
|
r49033 | Node::from_dirstate_entry(entry) | ||
Simon Sapin
|
r48951 | } | ||
Simon Sapin
|
r49332 | dirstate_map::NodeData::CachedDirectory { mtime } => { | ||
r49067 | // we currently never set a mtime if unknown file | |||
// are present. | ||||
// So if we have a mtime for a directory, we know | ||||
// they are no unknown | ||||
// files and we | ||||
// blindly set ALL_UNKNOWN_RECORDED. | ||||
// | ||||
// We never set ALL_IGNORED_RECORDED since we | ||||
// don't track that case | ||||
// currently. | ||||
Simon Sapin
|
r49332 | let mut flags = Flags::DIRECTORY | ||
r49083 | | Flags::HAS_MTIME | |||
Simon Sapin
|
r49332 | | Flags::ALL_UNKNOWN_RECORDED; | ||
if mtime.second_ambiguous { | ||||
flags.insert(Flags::MTIME_SECOND_AMBIGUOUS) | ||||
} | ||||
(flags, 0.into(), (*mtime).into()) | ||||
} | ||||
Simon Sapin
|
r48478 | dirstate_map::NodeData::None => ( | ||
r49083 | Flags::DIRECTORY, | |||
Simon Sapin
|
r49033 | 0.into(), | ||
PackedTruncatedTimestamp::null(), | ||||
Simon Sapin
|
r48478 | ), | ||
}; | ||||
Node { | ||||
children, | ||||
copy_source, | ||||
full_path, | ||||
base_name_start: u16::try_from(path.base_name_start()) | ||||
// Could only panic for paths over 64 KiB | ||||
.expect("dirstate-v2 path length overflow") | ||||
.into(), | ||||
descendants_with_entry_count: node | ||||
.descendants_with_entry_count | ||||
.into(), | ||||
tracked_descendants_count: node | ||||
.tracked_descendants_count | ||||
.into(), | ||||
Simon Sapin
|
r49045 | flags: flags.bits().into(), | ||
Simon Sapin
|
r49033 | size, | ||
mtime, | ||||
Simon Sapin
|
r48124 | } | ||
Simon Sapin
|
r48478 | } | ||
NodeRef::OnDisk(node) => Node { | ||||
Simon Sapin
|
r48137 | children, | ||
copy_source, | ||||
full_path, | ||||
Simon Sapin
|
r48478 | ..*node | ||
}, | ||||
}) | ||||
} | ||||
// … so we can write them contiguously, after writing everything else | ||||
// they refer to. | ||||
let start = self.current_offset(); | ||||
Simon Sapin
|
r48479 | let len = child_nodes_len_from_usize(nodes_len); | ||
Simon Sapin
|
r48478 | self.out.extend(on_disk_nodes.as_bytes()); | ||
Ok(ChildNodes { start, len }) | ||||
Simon Sapin
|
r48058 | } | ||
Simon Sapin
|
r48480 | /// If the given slice of items is within `on_disk`, returns its offset | ||
/// from the start of `on_disk`. | ||||
fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset> | ||||
Simon Sapin
|
r48479 | where | ||
T: BytesCast, | ||||
{ | ||||
fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> { | ||||
let start = slice.as_ptr() as usize; | ||||
let end = start + slice.len(); | ||||
start..=end | ||||
} | ||||
let slice_addresses = address_range(slice.as_bytes()); | ||||
let on_disk_addresses = address_range(self.dirstate_map.on_disk); | ||||
Simon Sapin
|
r48480 | if on_disk_addresses.contains(slice_addresses.start()) | ||
&& on_disk_addresses.contains(slice_addresses.end()) | ||||
{ | ||||
let offset = slice_addresses.start() - on_disk_addresses.start(); | ||||
Some(offset_from_usize(offset)) | ||||
} else { | ||||
None | ||||
} | ||||
Simon Sapin
|
r48479 | } | ||
Simon Sapin
|
r48478 | fn current_offset(&mut self) -> Offset { | ||
let mut offset = self.out.len(); | ||||
if self.append { | ||||
offset += self.dirstate_map.on_disk.len() | ||||
} | ||||
Simon Sapin
|
r48479 | offset_from_usize(offset) | ||
Simon Sapin
|
r48478 | } | ||
Simon Sapin
|
r48477 | |||
Simon Sapin
|
r48478 | fn write_path(&mut self, slice: &[u8]) -> PathSlice { | ||
Simon Sapin
|
r48480 | let len = path_len_from_usize(slice.len()); | ||
// Reuse an already-written path if possible | ||||
if self.append { | ||||
if let Some(start) = self.on_disk_offset_of(slice) { | ||||
return PathSlice { start, len }; | ||||
} | ||||
} | ||||
Simon Sapin
|
r48478 | let start = self.current_offset(); | ||
self.out.extend(slice.as_bytes()); | ||||
PathSlice { start, len } | ||||
} | ||||
Simon Sapin
|
r48058 | } | ||
Simon Sapin
|
r48479 | |||
fn offset_from_usize(x: usize) -> Offset { | ||||
u32::try_from(x) | ||||
// Could only panic for a dirstate file larger than 4 GiB | ||||
.expect("dirstate-v2 offset overflow") | ||||
.into() | ||||
} | ||||
fn child_nodes_len_from_usize(x: usize) -> Size { | ||||
u32::try_from(x) | ||||
// Could only panic with over 4 billion nodes | ||||
.expect("dirstate-v2 slice length overflow") | ||||
.into() | ||||
} | ||||
fn path_len_from_usize(x: usize) -> PathSize { | ||||
u16::try_from(x) | ||||
// Could only panic for paths over 64 KiB | ||||
.expect("dirstate-v2 path length overflow") | ||||
.into() | ||||
} | ||||
Simon Sapin
|
r49033 | |||
impl From<TruncatedTimestamp> for PackedTruncatedTimestamp { | ||||
fn from(timestamp: TruncatedTimestamp) -> Self { | ||||
Self { | ||||
truncated_seconds: timestamp.truncated_seconds().into(), | ||||
nanoseconds: timestamp.nanoseconds().into(), | ||||
} | ||||
} | ||||
} | ||||
impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp { | ||||
type Error = DirstateV2ParseError; | ||||
fn try_from( | ||||
timestamp: PackedTruncatedTimestamp, | ||||
) -> Result<Self, Self::Error> { | ||||
Self::from_already_truncated( | ||||
timestamp.truncated_seconds.get(), | ||||
timestamp.nanoseconds.get(), | ||||
r49227 | false, | |||
Simon Sapin
|
r49033 | ) | ||
} | ||||
} | ||||
impl PackedTruncatedTimestamp { | ||||
fn null() -> Self { | ||||
Self { | ||||
truncated_seconds: 0.into(), | ||||
nanoseconds: 0.into(), | ||||
} | ||||
} | ||||
} | ||||