# HG changeset patch # User Simon Sapin # Date 2021-04-12 12:21:47 # Node ID d6c94ca40863d7daee2d04e6ededed71219a7a11 # Parent 5d62243c773215a2c44721e209d1921af8d6edb2 dirstate-tree: Serialize to disk The existing `pack_dirstate` function relies on implementation details of `DirstateMap`, so extract some parts of it as separate functions for us in the tree-based `DirstateMap`. The `bytes-cast` crate is updated to a version that has an `as_bytes` method, not just `from_bytes`: https://docs.rs/bytes-cast/0.2.0/bytes_cast/trait.BytesCast.html#method.as_bytes Drive-by refactor `clear_ambiguous_times` which does part of the same thing. Differential Revision: https://phab.mercurial-scm.org/D10486 diff --git a/rust/Cargo.lock b/rust/Cargo.lock --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -64,9 +64,9 @@ checksum = "08c48aae112d48ed9f069b33538e [[package]] name = "bytes-cast" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3196ba300c7bc9282a4331e878496cb3e9603a898a8f1446601317163e16ca52" +checksum = "0d434f9a4ecbe987e7ccfda7274b6f82ea52c9b63742565a65cb5e8ba0f2c452" dependencies = [ "bytes-cast-derive", ] diff --git a/rust/hg-core/Cargo.toml b/rust/hg-core/Cargo.toml --- a/rust/hg-core/Cargo.toml +++ b/rust/hg-core/Cargo.toml @@ -9,7 +9,7 @@ edition = "2018" name = "hg" [dependencies] -bytes-cast = "0.1" +bytes-cast = "0.2" byteorder = "1.3.4" derive_more = "0.99" home = "0.5" diff --git a/rust/hg-core/src/dirstate/parsers.rs b/rust/hg-core/src/dirstate/parsers.rs --- a/rust/hg-core/src/dirstate/parsers.rs +++ b/rust/hg-core/src/dirstate/parsers.rs @@ -4,7 +4,7 @@ // GNU General Public License version 2 or any later version. use crate::errors::HgError; -use crate::utils::hg_path::HgPath; +use crate::utils::hg_path::{HgPath, HgPathBuf}; use crate::{ dirstate::{CopyMap, EntryState, RawEntry, StateMap}, DirstateEntry, DirstateParents, @@ -82,9 +82,71 @@ pub fn parse_dirstate_entries<'a>( Ok(parents) } +fn packed_filename_and_copy_source_size( + filename: &HgPathBuf, + copy_source: Option<&HgPathBuf>, +) -> usize { + filename.len() + + if let Some(source) = copy_source { + b"\0".len() + source.len() + } else { + 0 + } +} + +pub fn packed_entry_size( + filename: &HgPathBuf, + copy_source: Option<&HgPathBuf>, +) -> usize { + MIN_ENTRY_SIZE + + packed_filename_and_copy_source_size(filename, copy_source) +} + +pub fn pack_entry( + filename: &HgPathBuf, + entry: &DirstateEntry, + copy_source: Option<&HgPathBuf>, + packed: &mut Vec, +) { + let length = packed_filename_and_copy_source_size(filename, copy_source); + + // Unwrapping because `impl std::io::Write for Vec` never errors + packed.write_u8(entry.state.into()).unwrap(); + packed.write_i32::(entry.mode).unwrap(); + packed.write_i32::(entry.size).unwrap(); + packed.write_i32::(entry.mtime).unwrap(); + packed.write_i32::(length as i32).unwrap(); + packed.extend(filename.as_bytes()); + if let Some(source) = copy_source { + packed.push(b'\0'); + packed.extend(source.as_bytes()); + } +} + /// Seconds since the Unix epoch pub struct Timestamp(pub u64); +pub fn clear_ambiguous_mtime( + entry: &mut DirstateEntry, + mtime_now: i32, +) -> bool { + let ambiguous = + entry.state == EntryState::Normal && entry.mtime == mtime_now; + if ambiguous { + // The file was last modified "simultaneously" with the current + // write to dirstate (i.e. within the same second for file- + // systems with a granularity of 1 sec). This commonly happens + // for at least a couple of files on 'update'. + // The user could change the file without changing its size + // within the same second. Invalidate the file's mtime in + // dirstate, forcing future 'status' calls to compare the + // contents of the file if the size is the same. This prevents + // mistakenly treating such files as clean. + entry.mtime = -1; + } + ambiguous +} + pub fn pack_dirstate( state_map: &mut StateMap, copy_map: &CopyMap, @@ -97,11 +159,7 @@ pub fn pack_dirstate( let expected_size: usize = state_map .iter() .map(|(filename, _)| { - let mut length = MIN_ENTRY_SIZE + filename.len(); - if let Some(copy) = copy_map.get(filename) { - length += copy.len() + 1; - } - length + packed_entry_size(filename, copy_map.get(filename)) }) .sum(); let expected_size = expected_size + PARENT_SIZE * 2; @@ -112,39 +170,8 @@ pub fn pack_dirstate( packed.extend(parents.p2.as_bytes()); for (filename, entry) in state_map.iter_mut() { - let new_filename = filename.to_owned(); - let mut new_mtime: i32 = entry.mtime; - if entry.state == EntryState::Normal && entry.mtime == now { - // The file was last modified "simultaneously" with the current - // write to dirstate (i.e. within the same second for file- - // systems with a granularity of 1 sec). This commonly happens - // for at least a couple of files on 'update'. - // The user could change the file without changing its size - // within the same second. Invalidate the file's mtime in - // dirstate, forcing future 'status' calls to compare the - // contents of the file if the size is the same. This prevents - // mistakenly treating such files as clean. - new_mtime = -1; - *entry = DirstateEntry { - mtime: new_mtime, - ..*entry - }; - } - let mut new_filename = new_filename.into_vec(); - if let Some(copy) = copy_map.get(filename) { - new_filename.push(b'\0'); - new_filename.extend(copy.bytes()); - } - - // Unwrapping because `impl std::io::Write for Vec` never errors - packed.write_u8(entry.state.into()).unwrap(); - packed.write_i32::(entry.mode).unwrap(); - packed.write_i32::(entry.size).unwrap(); - packed.write_i32::(new_mtime).unwrap(); - packed - .write_i32::(new_filename.len() as i32) - .unwrap(); - packed.extend(new_filename) + clear_ambiguous_mtime(entry, now); + pack_entry(filename, entry, copy_map.get(filename), &mut packed) } if packed.len() != expected_size { diff --git a/rust/hg-core/src/dirstate_tree/dirstate_map.rs b/rust/hg-core/src/dirstate_tree/dirstate_map.rs --- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs +++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs @@ -1,11 +1,14 @@ -use std::collections::BTreeMap; +use bytes_cast::BytesCast; use std::path::PathBuf; +use std::{collections::BTreeMap, convert::TryInto}; use super::path_with_basename::WithBasename; +use crate::dirstate::parsers::clear_ambiguous_mtime; +use crate::dirstate::parsers::pack_entry; +use crate::dirstate::parsers::packed_entry_size; use crate::dirstate::parsers::parse_dirstate_entries; use crate::dirstate::parsers::parse_dirstate_parents; use crate::dirstate::parsers::Timestamp; - use crate::matchers::Matcher; use crate::revlog::node::NULL_NODE; use crate::utils::hg_path::{HgPath, HgPathBuf}; @@ -327,11 +330,38 @@ impl super::dispatch::DirstateMapMethods fn pack( &mut self, - _parents: DirstateParents, - _now: Timestamp, + parents: DirstateParents, + now: Timestamp, ) -> Result, DirstateError> { - let _ = self.iter_node_data_mut(); - todo!() + // Optizimation (to be measured?): pre-compute size to avoid `Vec` + // reallocations + let mut size = parents.as_bytes().len(); + for (path, node) in self.iter_nodes() { + if node.entry.is_some() { + size += packed_entry_size( + path.full_path(), + node.copy_source.as_ref(), + ) + } + } + + let mut packed = Vec::with_capacity(size); + packed.extend(parents.as_bytes()); + + let now: i32 = now.0.try_into().expect("time overflow"); + for (path, opt_entry, copy_source) in self.iter_node_data_mut() { + if let Some(entry) = opt_entry { + clear_ambiguous_mtime(entry, now); + pack_entry( + path.full_path(), + entry, + copy_source.as_ref(), + &mut packed, + ); + } + } + self.dirty_parents = false; + Ok(packed) } fn build_file_fold_map(&mut self) -> &FastHashMap {