# HG changeset patch # User Simon Sapin # Date 2021-07-16 12:08:26 # Node ID 284a20269a97501806d3ba90c92f0ddbf7eb7a7c # Parent 78f7f0d490ee2e064c6dbab5a9e2aec4b448e9c7 dirstate-v2: Separate iterators for dirfoldmap and debugdirstate `dirstatemap.dirfoldmap` was recently changed to re-use a Rust iterator that was added for the `hg debugdirstate` command. That iterator was based on all nodes in the tree dirstate without an entry only existing to hold child nodes, and therefore being directories. However to optimize status further we may want to store additional nodes for unknown or ignored files and directories. At that point the two users of this iterator will want different things, so let’s make two iterators instead. See doc-comments in `dispatch.rs`. Differential Revision: https://phab.mercurial-scm.org/D11099 diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py --- a/mercurial/debugcommands.py +++ b/mercurial/debugcommands.py @@ -942,7 +942,12 @@ def debugdeltachain(ui, repo, file_=None ), (b'', b'dates', True, _(b'display the saved mtime')), (b'', b'datesort', None, _(b'sort by saved mtime')), - (b'', b'dirs', False, _(b'display directories')), + ( + b'', + b'all', + False, + _(b'display dirstate-v2 tree nodes that would not exist in v1'), + ), ], _(b'[OPTION]...'), ) @@ -961,9 +966,10 @@ def debugstate(ui, repo, **opts): ) # sort by mtime, then by filename else: keyfunc = None # sort by filename - entries = list(pycompat.iteritems(repo.dirstate)) - if opts['dirs']: - entries.extend(repo.dirstate.directories()) + if opts['all']: + entries = list(repo.dirstate._map.debug_iter()) + else: + entries = list(pycompat.iteritems(repo.dirstate)) entries.sort(key=keyfunc) for file_, ent in entries: if ent.v1_mtime() == -1: diff --git a/mercurial/dirstatemap.py b/mercurial/dirstatemap.py --- a/mercurial/dirstatemap.py +++ b/mercurial/dirstatemap.py @@ -105,10 +105,6 @@ class dirstatemap(object): self._map return self.copymap - def directories(self): - # Rust / dirstate-v2 only - return [] - def clear(self): self._map.clear() self.copymap.clear() @@ -126,6 +122,8 @@ class dirstatemap(object): # forward for python2,3 compat iteritems = items + debug_iter = items + def __len__(self): return len(self._map) @@ -525,6 +523,9 @@ if rustmod is not None: def directories(self): return self._rustmap.directories() + def debug_iter(self): + return self._rustmap.debug_iter() + def preload(self): self._rustmap @@ -746,6 +747,6 @@ if rustmod is not None: def dirfoldmap(self): f = {} normcase = util.normcase - for name, _pseudo_entry in self.directories(): + for name in self._rustmap.tracked_dirs(): f[normcase(name)] = name return f diff --git a/rust/hg-core/src/dirstate.rs b/rust/hg-core/src/dirstate.rs --- a/rust/hg-core/src/dirstate.rs +++ b/rust/hg-core/src/dirstate.rs @@ -65,6 +65,12 @@ impl DirstateEntry { let fs_exec_bit = filesystem_metadata.mode() & EXEC_BIT_MASK; dirstate_exec_bit != fs_exec_bit } + + /// Returns a `(state, mode, size, mtime)` tuple as for + /// `DirstateMapMethods::debug_iter`. + pub fn debug_tuple(&self) -> (u8, i32, i32, i32) { + (self.state.into(), self.mode, self.size, self.mtime) + } } #[derive(BytesCast)] diff --git a/rust/hg-core/src/dirstate_tree/dirstate_map.rs b/rust/hg-core/src/dirstate_tree/dirstate_map.rs --- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs +++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs @@ -1246,27 +1246,50 @@ impl<'on_disk> super::dispatch::Dirstate })) } - fn iter_directories( + fn iter_tracked_dirs( + &mut self, + ) -> Result< + Box< + dyn Iterator> + + Send + + '_, + >, + DirstateError, + > { + let on_disk = self.on_disk; + Ok(Box::new(filter_map_results( + self.iter_nodes(), + move |node| { + Ok(if node.tracked_descendants_count() > 0 { + Some(node.full_path(on_disk)?) + } else { + None + }) + }, + ))) + } + + fn debug_iter( &self, ) -> Box< dyn Iterator< Item = Result< - (&HgPath, Option), + (&HgPath, (u8, i32, i32, i32)), DirstateV2ParseError, >, > + Send + '_, > { - Box::new(filter_map_results(self.iter_nodes(), move |node| { - Ok(if node.state()?.is_none() { - Some(( - node.full_path(self.on_disk)?, - node.cached_directory_mtime() - .map(|mtime| Timestamp(mtime.seconds())), - )) + Box::new(self.iter_nodes().map(move |node| { + let node = node?; + let debug_tuple = if let Some(entry) = node.entry()? { + entry.debug_tuple() + } else if let Some(mtime) = node.cached_directory_mtime() { + (b' ', 0, -1, mtime.seconds() as i32) } else { - None - }) + (b' ', 0, -1, -1) + }; + Ok((node.full_path(self.on_disk)?, debug_tuple)) })) } } diff --git a/rust/hg-core/src/dirstate_tree/dispatch.rs b/rust/hg-core/src/dirstate_tree/dispatch.rs --- a/rust/hg-core/src/dirstate_tree/dispatch.rs +++ b/rust/hg-core/src/dirstate_tree/dispatch.rs @@ -259,20 +259,40 @@ pub trait DirstateMapMethods { /// are `Result`s. fn iter(&self) -> StateMapIter<'_>; - /// In the tree dirstate, return an iterator of "directory" (entry-less) - /// nodes with the data stored for them. This is for `hg debugdirstate - /// --dirs`. + /// Returns an iterator of tracked directories. /// - /// In the flat dirstate, returns an empty iterator. + /// This is the paths for which `has_tracked_dir` would return true. + /// Or, in other words, the union of ancestor paths of all paths that have + /// an associated entry in a "tracked" state in this dirstate map. /// /// Because parse errors can happen during iteration, the iterated items /// are `Result`s. - fn iter_directories( + fn iter_tracked_dirs( + &mut self, + ) -> Result< + Box< + dyn Iterator> + + Send + + '_, + >, + DirstateError, + >; + + /// Return an iterator of `(path, (state, mode, size, mtime))` for every + /// node stored in this dirstate map, for the purpose of the `hg + /// debugdirstate` command. + /// + /// For nodes that don’t have an entry, `state` is the ASCII space. + /// An `mtime` may still be present. It is used to optimize `status`. + /// + /// Because parse errors can happen during iteration, the iterated items + /// are `Result`s. + fn debug_iter( &self, ) -> Box< dyn Iterator< Item = Result< - (&HgPath, Option), + (&HgPath, (u8, i32, i32, i32)), DirstateV2ParseError, >, > + Send @@ -476,17 +496,41 @@ impl DirstateMapMethods for DirstateMap Box::new((&**self).iter().map(|(key, value)| Ok((&**key, *value)))) } - fn iter_directories( + fn iter_tracked_dirs( + &mut self, + ) -> Result< + Box< + dyn Iterator> + + Send + + '_, + >, + DirstateError, + > { + self.set_all_dirs()?; + Ok(Box::new( + self.all_dirs + .as_ref() + .unwrap() + .iter() + .map(|path| Ok(&**path)), + )) + } + + fn debug_iter( &self, ) -> Box< dyn Iterator< Item = Result< - (&HgPath, Option), + (&HgPath, (u8, i32, i32, i32)), DirstateV2ParseError, >, > + Send + '_, > { - Box::new(std::iter::empty()) + Box::new( + (&**self) + .iter() + .map(|(path, entry)| Ok((&**path, entry.debug_tuple()))), + ) } } diff --git a/rust/hg-cpython/src/dirstate.rs b/rust/hg-cpython/src/dirstate.rs --- a/rust/hg-cpython/src/dirstate.rs +++ b/rust/hg-cpython/src/dirstate.rs @@ -52,9 +52,6 @@ pub fn make_dirstate_item( py: Python, entry: &DirstateEntry, ) -> PyResult { - // might be silly to retrieve capsule function in hot loop - let make = make_dirstate_item_capi::retrieve(py)?; - let &DirstateEntry { state, mode, @@ -65,22 +62,19 @@ pub fn make_dirstate_item( // because Into has a specific implementation while `as c_char` would // just do a naive enum cast. let state_code: u8 = state.into(); - - let maybe_obj = unsafe { - let ptr = make(state_code as c_char, mode, size, mtime); - PyObject::from_owned_ptr_opt(py, ptr) - }; - maybe_obj.ok_or_else(|| PyErr::fetch(py)) + make_dirstate_item_raw(py, state_code, mode, size, mtime) } -// XXX a bit strange to have a dedicated function, but directory are not -// treated as dirstate node by hg-core for now so… -pub fn make_directory_item(py: Python, mtime: i32) -> PyResult { - // might be silly to retrieve capsule function in hot loop +pub fn make_dirstate_item_raw( + py: Python, + state: u8, + mode: i32, + size: i32, + mtime: i32, +) -> PyResult { let make = make_dirstate_item_capi::retrieve(py)?; - let maybe_obj = unsafe { - let ptr = make(b'd' as c_char, 0 as i32, 0 as i32, mtime); + let ptr = make(state as c_char, mode, size, mtime); PyObject::from_owned_ptr_opt(py, ptr) }; maybe_obj.ok_or_else(|| PyErr::fetch(py)) diff --git a/rust/hg-cpython/src/dirstate/dirstate_map.rs b/rust/hg-cpython/src/dirstate/dirstate_map.rs --- a/rust/hg-cpython/src/dirstate/dirstate_map.rs +++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs @@ -19,8 +19,8 @@ use cpython::{ use crate::{ dirstate::copymap::{CopyMap, CopyMapItemsIterator, CopyMapKeysIterator}, - dirstate::make_directory_item, dirstate::make_dirstate_item, + dirstate::make_dirstate_item_raw, dirstate::non_normal_entries::{ NonNormalEntries, NonNormalEntriesIterator, }, @@ -61,17 +61,14 @@ py_class!(pub class DirstateMap |py| { use_dirstate_tree: bool, on_disk: PyBytes, ) -> PyResult { - let dirstate_error = |e: DirstateError| { - PyErr::new::(py, format!("Dirstate error: {:?}", e)) - }; let (inner, parents) = if use_dirstate_tree { let (map, parents) = OwningDirstateMap::new_v1(py, on_disk) - .map_err(dirstate_error)?; + .map_err(|e| dirstate_error(py, e))?; (Box::new(map) as _, parents) } else { let bytes = on_disk.data(py); let mut map = RustDirstateMap::default(); - let parents = map.read(bytes).map_err(dirstate_error)?; + let parents = map.read(bytes).map_err(|e| dirstate_error(py, e))?; (Box::new(map) as _, parents) }; let map = Self::create_instance(py, inner)?; @@ -550,19 +547,29 @@ py_class!(pub class DirstateMap |py| { ) } - def directories(&self) -> PyResult { + def tracked_dirs(&self) -> PyResult { let dirs = PyList::new(py, &[]); - for item in self.inner(py).borrow().iter_directories() { - let (path, mtime) = item.map_err(|e| v2_error(py, e))?; + for path in self.inner(py).borrow_mut().iter_tracked_dirs() + .map_err(|e |dirstate_error(py, e))? + { + let path = path.map_err(|e| v2_error(py, e))?; let path = PyBytes::new(py, path.as_bytes()); - let mtime = mtime.map(|t| t.0).unwrap_or(-1); - let item = make_directory_item(py, mtime as i32)?; - let tuple = (path, item); - dirs.append(py, tuple.to_py_object(py).into_object()) + dirs.append(py, path.into_object()) } Ok(dirs) } + def debug_iter(&self) -> PyResult { + let dirs = PyList::new(py, &[]); + for item in self.inner(py).borrow().debug_iter() { + let (path, (state, mode, size, mtime)) = + item.map_err(|e| v2_error(py, e))?; + let path = PyBytes::new(py, path.as_bytes()); + let item = make_dirstate_item_raw(py, state, mode, size, mtime)?; + dirs.append(py, (path, item).to_py_object(py).into_object()) + } + Ok(dirs) + } }); impl DirstateMap { @@ -616,3 +623,7 @@ fn extract_node_id(py: Python, obj: &PyO pub(super) fn v2_error(py: Python<'_>, _: DirstateV2ParseError) -> PyErr { PyErr::new::(py, "corrupted dirstate-v2") } + +fn dirstate_error(py: Python<'_>, e: DirstateError) -> PyErr { + PyErr::new::(py, format!("Dirstate error: {:?}", e)) +} diff --git a/rust/hg-cpython/src/dirstate/dispatch.rs b/rust/hg-cpython/src/dirstate/dispatch.rs --- a/rust/hg-cpython/src/dirstate/dispatch.rs +++ b/rust/hg-cpython/src/dirstate/dispatch.rs @@ -203,17 +203,30 @@ impl DirstateMapMethods for OwningDirsta self.get().iter() } - fn iter_directories( + fn iter_tracked_dirs( + &mut self, + ) -> Result< + Box< + dyn Iterator> + + Send + + '_, + >, + DirstateError, + > { + self.get_mut().iter_tracked_dirs() + } + + fn debug_iter( &self, ) -> Box< dyn Iterator< Item = Result< - (&HgPath, Option), + (&HgPath, (u8, i32, i32, i32)), DirstateV2ParseError, >, > + Send + '_, > { - self.get().iter_directories() + self.get().debug_iter() } } diff --git a/tests/test-completion.t b/tests/test-completion.t --- a/tests/test-completion.t +++ b/tests/test-completion.t @@ -284,7 +284,7 @@ Show all commands + options debugdate: extended debugdeltachain: changelog, manifest, dir, template debugdirstateignorepatternshash: - debugdirstate: nodates, dates, datesort, dirs + debugdirstate: nodates, dates, datesort, all debugdiscovery: old, nonheads, rev, seed, local-as-revs, remote-as-revs, ssh, remotecmd, insecure, template debugdownload: output debugextensions: template diff --git a/tests/test-status.t b/tests/test-status.t --- a/tests/test-status.t +++ b/tests/test-status.t @@ -929,23 +929,23 @@ Check read_dir caching The cached mtime is initially unset - $ hg debugdirstate --dirs --no-dates | grep '^d' - d 0 0 unset subdir + $ hg debugdirstate --all --no-dates | grep '^ ' + 0 -1 unset subdir It is still not set when there are unknown files $ touch subdir/unknown $ hg status ? subdir/unknown - $ hg debugdirstate --dirs --no-dates | grep '^d' - d 0 0 unset subdir + $ hg debugdirstate --all --no-dates | grep '^ ' + 0 -1 unset subdir Now the directory is eligible for caching, so its mtime is save in the dirstate $ rm subdir/unknown $ hg status - $ hg debugdirstate --dirs --no-dates | grep '^d' - d 0 0 set subdir + $ hg debugdirstate --all --no-dates | grep '^ ' + 0 -1 set subdir This time the command should be ever so slightly faster since it does not need `read_dir("subdir")` @@ -963,11 +963,11 @@ Creating a new file changes the directory’s mtime, invalidating the cache Removing a node from the dirstate resets the cache for its parent directory $ hg forget subdir/a - $ hg debugdirstate --dirs --no-dates | grep '^d' - d 0 0 set subdir + $ hg debugdirstate --all --no-dates | grep '^ ' + 0 -1 set subdir $ hg ci -qm '#1' - $ hg debugdirstate --dirs --no-dates | grep '^d' - d 0 0 unset subdir + $ hg debugdirstate --all --no-dates | grep '^ ' + 0 -1 unset subdir $ hg status ? subdir/a