diff --git a/mercurial/configitems.py b/mercurial/configitems.py --- a/mercurial/configitems.py +++ b/mercurial/configitems.py @@ -640,6 +640,15 @@ coreconfigitem( b'deprec-warn', default=False, ) +# possible values: +# - auto (the default) +# - force-append +# - force-new +coreconfigitem( + b'devel', + b'dirstate.v2.data_update_mode', + default="auto", +) coreconfigitem( b'devel', b'disableloaddefaultcerts', diff --git a/mercurial/dirstatemap.py b/mercurial/dirstatemap.py --- a/mercurial/dirstatemap.py +++ b/mercurial/dirstatemap.py @@ -33,6 +33,7 @@ rangemask = 0x7FFFFFFF WRITE_MODE_AUTO = 0 WRITE_MODE_FORCE_NEW = 1 +WRITE_MODE_FORCE_APPEND = 2 class _dirstatemapcommon: @@ -57,6 +58,16 @@ class _dirstatemapcommon: self._parents = None self._dirtyparents = False self._docket = None + write_mode = ui.config(b"devel", b"dirstate.v2.data_update_mode") + if write_mode == b"auto": + self._write_mode = WRITE_MODE_AUTO + elif write_mode == b"force-append": + self._write_mode = WRITE_MODE_FORCE_APPEND + elif write_mode == b"force-new": + self._write_mode = WRITE_MODE_FORCE_NEW + else: + # unknown value, fallback to default + self._write_mode = WRITE_MODE_AUTO # for consistent view between _pl() and _read() invocations self._pendingmode = None @@ -612,7 +623,7 @@ if rustmod is not None: return # We can only append to an existing data file if there is one - write_mode = WRITE_MODE_AUTO + write_mode = self._write_mode if self.docket.uuid is None: write_mode = WRITE_MODE_FORCE_NEW packed, meta, append = self._map.write_v2(write_mode) diff --git a/rust/hg-core/src/dirstate_tree/dirstate_map.rs b/rust/hg-core/src/dirstate_tree/dirstate_map.rs --- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs +++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs @@ -42,6 +42,7 @@ pub enum DirstateVersion { pub enum DirstateMapWriteMode { Auto, ForceNewDataFile, + ForceAppend, } #[derive(Debug)] @@ -69,6 +70,9 @@ pub struct DirstateMap<'on_disk> { pub(super) old_data_size: usize, pub(super) dirstate_version: DirstateVersion, + + /// Controlled by config option `devel.dirstate.v2.data_update_mode` + pub(super) write_mode: DirstateMapWriteMode, } /// Using a plain `HgPathBuf` of the full path from the repository root as a @@ -457,6 +461,7 @@ impl<'on_disk> DirstateMap<'on_disk> { unreachable_bytes: 0, old_data_size: 0, dirstate_version: DirstateVersion::V1, + write_mode: DirstateMapWriteMode::Auto, } } @@ -525,8 +530,15 @@ impl<'on_disk> DirstateMap<'on_disk> { /// append to the existing data file that contains `self.on_disk` (true), /// or create a new data file from scratch (false). pub(super) fn write_should_append(&self) -> bool { - let ratio = self.unreachable_bytes as f32 / self.on_disk.len() as f32; - ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO + match self.write_mode { + DirstateMapWriteMode::ForceAppend => true, + DirstateMapWriteMode::ForceNewDataFile => false, + DirstateMapWriteMode::Auto => { + let ratio = + self.unreachable_bytes as f32 / self.on_disk.len() as f32; + ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO + } + } } fn get_node<'tree>( @@ -923,6 +935,10 @@ impl<'on_disk> DirstateMap<'on_disk> { *unreachable_bytes += path.len() as u32 } } + + pub(crate) fn set_write_mode(&mut self, write_mode: DirstateMapWriteMode) { + self.write_mode = write_mode; + } } /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s. diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs --- a/rust/hg-core/src/dirstate_tree/on_disk.rs +++ b/rust/hg-core/src/dirstate_tree/on_disk.rs @@ -313,6 +313,7 @@ pub(super) fn read<'on_disk>( unreachable_bytes: meta.unreachable_bytes.get(), old_data_size: on_disk.len(), dirstate_version: DirstateVersion::V2, + write_mode: DirstateMapWriteMode::Auto, }; Ok(dirstate_map) } @@ -641,6 +642,7 @@ pub(super) fn write( let append = match write_mode { DirstateMapWriteMode::Auto => dirstate_map.write_should_append(), DirstateMapWriteMode::ForceNewDataFile => false, + DirstateMapWriteMode::ForceAppend => true, }; if append { log::trace!("appending to the dirstate data file"); diff --git a/rust/hg-core/src/repo.rs b/rust/hg-core/src/repo.rs --- a/rust/hg-core/src/repo.rs +++ b/rust/hg-core/src/repo.rs @@ -321,22 +321,39 @@ impl Repo { .set(Some(docket.uuid.to_owned())); let data_size = docket.data_size(); let metadata = docket.tree_metadata(); - if crate::vfs::is_on_nfs_mount(docket.data_filename()) { - // Don't mmap on NFS to prevent `SIGBUS` error on deletion - OwningDirstateMap::new_v2( - self.hg_vfs().read(docket.data_filename())?, - data_size, - metadata, - ) - } else if let Some(data_mmap) = self - .hg_vfs() - .mmap_open(docket.data_filename()) - .io_not_found_as_none()? - { - OwningDirstateMap::new_v2(data_mmap, data_size, metadata) - } else { - OwningDirstateMap::new_v2(Vec::new(), data_size, metadata) - } + let mut map = + if crate::vfs::is_on_nfs_mount(docket.data_filename()) { + // Don't mmap on NFS to prevent `SIGBUS` error on deletion + OwningDirstateMap::new_v2( + self.hg_vfs().read(docket.data_filename())?, + data_size, + metadata, + ) + } else if let Some(data_mmap) = self + .hg_vfs() + .mmap_open(docket.data_filename()) + .io_not_found_as_none()? + { + OwningDirstateMap::new_v2(data_mmap, data_size, metadata) + } else { + OwningDirstateMap::new_v2(Vec::new(), data_size, metadata) + }?; + + let write_mode_config = self + .config() + .get_str(b"devel", b"dirstate.v2.data_update_mode") + .unwrap_or(Some("auto")) + .unwrap_or("auto"); // don't bother for devel options + let write_mode = match write_mode_config { + "auto" => DirstateMapWriteMode::Auto, + "force-new" => DirstateMapWriteMode::ForceNewDataFile, + "force-append" => DirstateMapWriteMode::ForceAppend, + _ => DirstateMapWriteMode::Auto, + }; + + map.with_dmap_mut(|m| m.set_write_mode(write_mode)); + + Ok(map) } else { let (map, parents) = OwningDirstateMap::new_v1(dirstate_file_contents)?; diff --git a/rust/hg-cpython/src/dirstate/dirstate_map.rs b/rust/hg-cpython/src/dirstate/dirstate_map.rs --- a/rust/hg-cpython/src/dirstate/dirstate_map.rs +++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs @@ -254,6 +254,7 @@ py_class!(pub class DirstateMap |py| { let rust_write_mode = match write_mode { 0 => DirstateMapWriteMode::Auto, 1 => DirstateMapWriteMode::ForceNewDataFile, + 2 => DirstateMapWriteMode::ForceAppend, _ => DirstateMapWriteMode::Auto, // XXX should we error out? }; let result = inner.pack_v2(rust_write_mode); diff --git a/tests/test-dirstate-status-write-race.t b/tests/test-dirstate-status-write-race.t --- a/tests/test-dirstate-status-write-race.t +++ b/tests/test-dirstate-status-write-race.t @@ -93,7 +93,7 @@ Add some unknown files and refresh the d $ hg add dir/o $ hg remove dir/nested/m - $ hg st + $ hg st --config devel.dirstate.v2.data_update_mode=force-new A dir/o R dir/nested/m ? dir/n diff --git a/tests/test-dirstate.t b/tests/test-dirstate.t --- a/tests/test-dirstate.t +++ b/tests/test-dirstate.t @@ -211,8 +211,223 @@ Check that unused bytes counter is reset #endif +(non-Rust always rewrites) + +Test the devel option to control write behavior +============================================== + +Sometimes, debugging or testing the dirstate requires making sure that we have +done a complete rewrite of the data file and have no unreachable data around, +sometimes it requires we ensure we don't. + +We test the option to force this rewrite by creating the situation where an +append would happen and check that it doesn't happen. + + $ cd .. + $ hg init force-base + $ cd force-base + $ mkdir -p dir/nested dir2 + $ touch -t 200001010000 f dir/nested/a dir/b dir/c dir/d dir2/e dir/nested dir dir2 + $ hg commit -Aqm "recreate a bunch of files to facilitate append" + $ hg st --config devel.dirstate.v2.data_update_mode=force-new + $ cd .. + +#if dirstate-v2 + $ hg -R force-base debugstate --docket | grep unused + number of unused bytes: 0 + +Check with the option in "auto" mode +------------------------------------ + $ cp -a force-base append-mostly-no-force-rewrite + $ cd append-mostly-no-force-rewrite + $ current_uid=$(find_dirstate_uuid) + +Change mtime of dir on disk which will be recorded, causing a small enough change +to warrant only an append + + $ touch -t 202212010000 dir2 + $ hg st \ + > --config rhg.on-unsupported=abort \ + > --config devel.dirstate.v2.data_update_mode=auto + +UUID hasn't changed and a non-zero number of unused bytes means we've appended + + $ dirstate_uuid_has_not_changed + not testing because using Python implementation (no-rust no-rhg !) + +#if no-rust no-rhg +The pure python implementation never appends at the time this is written. + $ hg debugstate --docket | grep unused + number of unused bytes: 0 (known-bad-output !) +#else + $ hg debugstate --docket | grep unused + number of unused bytes: [1-9]\d* (re) +#endif + $ cd .. + +Check the same scenario with the option set to "force-new" +--------------------------------------------------------- + + $ cp -a force-base append-mostly-force-rewrite + $ cd append-mostly-force-rewrite + $ current_uid=$(find_dirstate_uuid) + +Change mtime of dir on disk which will be recorded, causing a small enough change +to warrant only an append, but we force the rewrite + + $ touch -t 202212010000 dir2 + $ hg st \ + > --config rhg.on-unsupported=abort \ + > --config devel.dirstate.v2.data_update_mode=force-new + +UUID has changed and zero unused bytes means a full-rewrite happened + + +#if no-rust no-rhg + $ dirstate_uuid_has_not_changed + not testing because using Python implementation +#else + $ dirstate_uuid_has_not_changed + [1] +#endif + $ hg debugstate --docket | grep unused + number of unused bytes: 0 + $ cd .. + + +Check the same scenario with the option set to "force-append" +------------------------------------------------------------- + +(should behave the same as "auto" here) + + $ cp -a force-base append-mostly-force-append + $ cd append-mostly-force-append + $ current_uid=$(find_dirstate_uuid) + +Change mtime of dir on disk which will be recorded, causing a small enough change +to warrant only an append, which we are forcing here anyway. + + $ touch -t 202212010000 dir2 + $ hg st \ + > --config rhg.on-unsupported=abort \ + > --config devel.dirstate.v2.data_update_mode=force-append + +UUID has not changed and some unused bytes exist in the data file + + $ dirstate_uuid_has_not_changed + not testing because using Python implementation (no-rust no-rhg !) + +#if no-rust no-rhg +The pure python implementation never appends at the time this is written. + $ hg debugstate --docket | grep unused + number of unused bytes: 0 (known-bad-output !) +#else + $ hg debugstate --docket | grep unused + number of unused bytes: [1-9]\d* (re) +#endif + $ cd .. + +Check with the option in "auto" mode +------------------------------------ + $ cp -a force-base append-mostly-no-force-rewrite + $ cd append-mostly-no-force-rewrite + $ current_uid=$(find_dirstate_uuid) + +Change mtime of everything on disk causing a full rewrite + + $ touch -t 202212010005 `hg files` + $ hg st \ + > --config rhg.on-unsupported=abort \ + > --config devel.dirstate.v2.data_update_mode=auto + +UUID has changed and zero unused bytes means we've rewritten. + +#if no-rust no-rhg + $ dirstate_uuid_has_not_changed + not testing because using Python implementation +#else + $ dirstate_uuid_has_not_changed + [1] +#endif + + $ hg debugstate --docket | grep unused + number of unused bytes: 0 (known-bad-output !) + $ cd .. + +Check the same scenario with the option set to "force-new" +--------------------------------------------------------- + +(should be the same as auto) + + $ cp -a force-base append-mostly-force-rewrite + $ cd append-mostly-force-rewrite + $ current_uid=$(find_dirstate_uuid) + +Change mtime of everything on disk causing a full rewrite + + $ touch -t 202212010005 `hg files` + $ hg st \ + > --config rhg.on-unsupported=abort \ + > --config devel.dirstate.v2.data_update_mode=force-new + +UUID has changed and a zero number unused bytes means we've rewritten. + + +#if no-rust no-rhg + $ dirstate_uuid_has_not_changed + not testing because using Python implementation +#else + $ dirstate_uuid_has_not_changed + [1] +#endif + $ hg debugstate --docket | grep unused + number of unused bytes: 0 + $ cd .. + + +Check the same scenario with the option set to "force-append" +------------------------------------------------------------- + +Should append even if "auto" did not + + $ cp -a force-base append-mostly-force-append + $ cd append-mostly-force-append + $ current_uid=$(find_dirstate_uuid) + +Change mtime of everything on disk causing a full rewrite + + $ touch -t 202212010005 `hg files` + $ hg st \ + > --config rhg.on-unsupported=abort \ + > --config devel.dirstate.v2.data_update_mode=force-append + +UUID has not changed and some unused bytes exist in the data file + + $ dirstate_uuid_has_not_changed + not testing because using Python implementation (no-rust no-rhg !) + +#if no-rust no-rhg +The pure python implementation is never appending at the time this is written. + $ hg debugstate --docket | grep unused + number of unused bytes: 0 (known-bad-output !) +#else + $ hg debugstate --docket | grep unused + number of unused bytes: [1-9]\d* (re) +#endif + $ cd .. + + + +Get back into a state suitable for the test of the file. + + $ cd ./append-mostly + +#else + $ cd ./u +#endif + Transaction compatibility -------------------------- +========================= The transaction preserves the dirstate. We should make sure all of it (docket + data) is preserved