# HG changeset patch # User Raphaël Gomès # Date 2022-04-25 09:09:33 # Node ID 10b9f11daf15f664a1688727b4b2cfa240501ccd # Parent f45e1618cbf68aca4e72b6fed8dd2acef6f39a10 # Parent 802e2c25dab8eb6d234cbaca66e8381a22e1a003 branching: merge stable into default This also added the small fix need in Rust tests for the new `DirstateMap::pack_v2` API change in stable. diff --git a/rust/hg-core/src/dirstate_tree/dirstate_map.rs b/rust/hg-core/src/dirstate_tree/dirstate_map.rs --- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs +++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs @@ -50,6 +50,10 @@ pub struct DirstateMap<'on_disk> { /// How many bytes of `on_disk` are not used anymore pub(super) unreachable_bytes: u32, + + /// Size of the data used to first load this `DirstateMap`. Used in case + /// we need to write some new metadata, but no new data on disk. + pub(super) old_data_size: usize, } /// Using a plain `HgPathBuf` of the full path from the repository root as a @@ -436,6 +440,7 @@ impl<'on_disk> DirstateMap<'on_disk> { nodes_with_copy_source_count: 0, ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN], unreachable_bytes: 0, + old_data_size: 0, } } @@ -1232,12 +1237,13 @@ impl OwningDirstateMap { /// Returns new data and metadata together with whether that data should be /// appended to the existing data file whose content is at /// `map.on_disk` (true), instead of written to a new data file - /// (false). + /// (false), and the previous size of data on disk. #[timed] pub fn pack_v2( &self, can_append: bool, - ) -> Result<(Vec, on_disk::TreeMetadata, bool), DirstateError> { + ) -> Result<(Vec, on_disk::TreeMetadata, bool, usize), DirstateError> + { let map = self.get_map(); on_disk::write(map, can_append) } @@ -1795,7 +1801,8 @@ mod tests { None, )?; - let (packed, metadata, _should_append) = map.pack_v2(false)?; + let (packed, metadata, _should_append, _old_data_size) = + map.pack_v2(false)?; let packed_len = packed.len(); assert!(packed_len > 0); diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs --- a/rust/hg-core/src/dirstate_tree/on_disk.rs +++ b/rust/hg-core/src/dirstate_tree/on_disk.rs @@ -290,6 +290,7 @@ pub(super) fn read<'on_disk>( nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(), ignore_patterns_hash: meta.ignore_patterns_hash, unreachable_bytes: meta.unreachable_bytes.get(), + old_data_size: on_disk.len(), }; Ok(dirstate_map) } @@ -601,11 +602,11 @@ pub(crate) fn for_each_tracked_path<'on_ /// Returns new data and metadata, together with whether that data should be /// appended to the existing data file whose content is at /// `dirstate_map.on_disk` (true), instead of written to a new data file -/// (false). +/// (false), and the previous size of data on disk. pub(super) fn write( dirstate_map: &DirstateMap, can_append: bool, -) -> Result<(Vec, TreeMetadata, bool), DirstateError> { +) -> Result<(Vec, TreeMetadata, bool, usize), DirstateError> { let append = can_append && dirstate_map.write_should_append(); // This ignores the space for paths, and for nodes without an entry. @@ -631,7 +632,7 @@ pub(super) fn write( unused: [0; 4], ignore_patterns_hash: dirstate_map.ignore_patterns_hash, }; - Ok((writer.out, meta, append)) + Ok((writer.out, meta, append, dirstate_map.old_data_size)) } struct Writer<'dmap, 'on_disk> { diff --git a/rust/hg-core/src/repo.rs b/rust/hg-core/src/repo.rs --- a/rust/hg-core/src/repo.rs +++ b/rust/hg-core/src/repo.rs @@ -423,22 +423,25 @@ impl Repo { // TODO: Maintain a `DirstateMap::dirty` flag, and return early here if // it’s unset let parents = self.dirstate_parents()?; - let packed_dirstate = if self.has_dirstate_v2() { + let (packed_dirstate, old_uuid_to_remove) = if self.has_dirstate_v2() { let uuid = self.dirstate_data_file_uuid.get_or_init(self)?; let mut uuid = uuid.as_ref(); let can_append = uuid.is_some(); - let (data, tree_metadata, append) = map.pack_v2(can_append)?; + let (data, tree_metadata, append, old_data_size) = + map.pack_v2(can_append)?; if !append { uuid = None } - let uuid = if let Some(uuid) = uuid { - std::str::from_utf8(uuid) + let (uuid, old_uuid) = if let Some(uuid) = uuid { + let as_str = std::str::from_utf8(uuid) .map_err(|_| { HgError::corrupted("non-UTF-8 dirstate data file ID") })? - .to_owned() + .to_owned(); + let old_uuid_to_remove = Some(as_str.to_owned()); + (as_str, old_uuid_to_remove) } else { - DirstateDocket::new_uid() + (DirstateDocket::new_uid(), None) }; let data_filename = format!("dirstate.{}", uuid); let data_filename = self.hg_vfs().join(data_filename); @@ -453,13 +456,23 @@ impl Repo { // returns `ErrorKind::AlreadyExists`? Collision chance of two // random IDs is one in 2**32 let mut file = options.open(&data_filename)?; - file.write_all(&data)?; - file.flush()?; - // TODO: use https://doc.rust-lang.org/std/io/trait.Seek.html#method.stream_position when we require Rust 1.51+ - file.seek(SeekFrom::Current(0)) + if data.is_empty() { + // If we're not appending anything, the data size is the + // same as in the previous docket. It is *not* the file + // length, since it could have garbage at the end. + // We don't have to worry about it when we do have data + // to append since we rewrite the root node in this case. + Ok(old_data_size as u64) + } else { + file.write_all(&data)?; + file.flush()?; + // TODO: use https://doc.rust-lang.org/std/io/trait.Seek.html#method.stream_position when we require Rust 1.51+ + file.seek(SeekFrom::Current(0)) + } })() .when_writing_file(&data_filename)?; - DirstateDocket::serialize( + + let packed_dirstate = DirstateDocket::serialize( parents, tree_metadata, data_size, @@ -467,11 +480,20 @@ impl Repo { ) .map_err(|_: std::num::TryFromIntError| { HgError::corrupted("overflow in dirstate docket serialization") - })? + })?; + + (packed_dirstate, old_uuid) } else { - map.pack_v1(parents)? + (map.pack_v1(parents)?, None) }; - self.hg_vfs().atomic_write("dirstate", &packed_dirstate)?; + + let vfs = self.hg_vfs(); + vfs.atomic_write("dirstate", &packed_dirstate)?; + if let Some(uuid) = old_uuid_to_remove { + // Remove the old data file after the new docket pointing to the + // new data file was written. + vfs.remove_file(format!("dirstate.{}", uuid))?; + } Ok(()) } } diff --git a/rust/hg-cpython/src/dirstate/dirstate_map.rs b/rust/hg-cpython/src/dirstate/dirstate_map.rs --- a/rust/hg-cpython/src/dirstate/dirstate_map.rs +++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs @@ -252,7 +252,7 @@ py_class!(pub class DirstateMap |py| { let inner = self.inner(py).borrow(); let result = inner.pack_v2(can_append); match result { - Ok((packed, tree_metadata, append)) => { + Ok((packed, tree_metadata, append, _old_data_size)) => { let packed = PyBytes::new(py, &packed); let tree_metadata = PyBytes::new(py, tree_metadata.as_bytes()); let tuple = (packed, tree_metadata, append);