##// END OF EJS Templates
dirstate-tree: Add the new `status()` algorithm...
Simon Sapin -
r47883:be579775 default
parent child Browse files
Show More
@@ -358,6 +358,7 b' dependencies = ['
358 "format-bytes",
358 "format-bytes",
359 "home",
359 "home",
360 "im-rc",
360 "im-rc",
361 "itertools",
361 "lazy_static",
362 "lazy_static",
362 "log",
363 "log",
363 "memmap",
364 "memmap",
@@ -14,6 +14,7 b' byteorder = "1.3.4"'
14 derive_more = "0.99"
14 derive_more = "0.99"
15 home = "0.5"
15 home = "0.5"
16 im-rc = "15.0.*"
16 im-rc = "15.0.*"
17 itertools = "0.9"
17 lazy_static = "1.4.0"
18 lazy_static = "1.4.0"
18 rand = "0.7.3"
19 rand = "0.7.3"
19 rand_pcg = "0.2.1"
20 rand_pcg = "0.2.1"
@@ -42,6 +42,19 b' impl DirstateEntry {'
42 pub fn is_from_other_parent(&self) -> bool {
42 pub fn is_from_other_parent(&self) -> bool {
43 self.state == EntryState::Normal && self.size == SIZE_FROM_OTHER_PARENT
43 self.state == EntryState::Normal && self.size == SIZE_FROM_OTHER_PARENT
44 }
44 }
45
46 // TODO: other platforms
47 #[cfg(unix)]
48 pub fn mode_changed(
49 &self,
50 filesystem_metadata: &std::fs::Metadata,
51 ) -> bool {
52 use std::os::unix::fs::MetadataExt;
53 const EXEC_BIT_MASK: u32 = 0o100;
54 let dirstate_exec_bit = (self.mode as u32) & EXEC_BIT_MASK;
55 let fs_exec_bit = filesystem_metadata.mode() & EXEC_BIT_MASK;
56 dirstate_exec_bit != fs_exec_bit
57 }
45 }
58 }
46
59
47 #[derive(BytesCast)]
60 #[derive(BytesCast)]
@@ -95,7 +95,7 b' pub enum Dispatch {'
95
95
96 type IoResult<T> = std::io::Result<T>;
96 type IoResult<T> = std::io::Result<T>;
97
97
98 /// `Box<dyn Trait>` is syntactic sugar for `Box<dyn Trait, 'static>`, so add
98 /// `Box<dyn Trait>` is syntactic sugar for `Box<dyn Trait + 'static>`, so add
99 /// an explicit lifetime here to not fight `'static` bounds "out of nowhere".
99 /// an explicit lifetime here to not fight `'static` bounds "out of nowhere".
100 pub type IgnoreFnType<'a> =
100 pub type IgnoreFnType<'a> =
101 Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>;
101 Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>;
@@ -255,7 +255,7 b' pub struct StatusOptions {'
255 pub collect_traversed_dirs: bool,
255 pub collect_traversed_dirs: bool,
256 }
256 }
257
257
258 #[derive(Debug)]
258 #[derive(Debug, Default)]
259 pub struct DirstateStatus<'a> {
259 pub struct DirstateStatus<'a> {
260 /// Tracked files whose contents have changed since the parent revision
260 /// Tracked files whose contents have changed since the parent revision
261 pub modified: Vec<HgPathCow<'a>>,
261 pub modified: Vec<HgPathCow<'a>>,
@@ -27,7 +27,7 b' use crate::StatusOptions;'
27 pub struct DirstateMap {
27 pub struct DirstateMap {
28 parents: Option<DirstateParents>,
28 parents: Option<DirstateParents>,
29 dirty_parents: bool,
29 dirty_parents: bool,
30 root: ChildNodes,
30 pub(super) root: ChildNodes,
31
31
32 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
32 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
33 nodes_with_entry_count: usize,
33 nodes_with_entry_count: usize,
@@ -42,17 +42,17 b' pub struct DirstateMap {'
42 /// path, so comparing full paths gives the same result as comparing base
42 /// path, so comparing full paths gives the same result as comparing base
43 /// names. However `BTreeMap` would waste time always re-comparing the same
43 /// names. However `BTreeMap` would waste time always re-comparing the same
44 /// string prefix.
44 /// string prefix.
45 type ChildNodes = BTreeMap<WithBasename<HgPathBuf>, Node>;
45 pub(super) type ChildNodes = BTreeMap<WithBasename<HgPathBuf>, Node>;
46
46
47 /// Represents a file or a directory
47 /// Represents a file or a directory
48 #[derive(Default)]
48 #[derive(Default)]
49 struct Node {
49 pub(super) struct Node {
50 /// `None` for directories
50 /// `None` for directories
51 entry: Option<DirstateEntry>,
51 pub(super) entry: Option<DirstateEntry>,
52
52
53 copy_source: Option<HgPathBuf>,
53 pub(super) copy_source: Option<HgPathBuf>,
54
54
55 children: ChildNodes,
55 pub(super) children: ChildNodes,
56
56
57 /// How many (non-inclusive) descendants of this node are tracked files
57 /// How many (non-inclusive) descendants of this node are tracked files
58 tracked_descendants_count: usize,
58 tracked_descendants_count: usize,
@@ -67,6 +67,10 b' impl Node {'
67 false
67 false
68 }
68 }
69 }
69 }
70
71 pub(super) fn state(&self) -> Option<EntryState> {
72 self.entry.as_ref().map(|entry| entry.state)
73 }
70 }
74 }
71
75
72 /// `(full_path, entry, copy_source)`
76 /// `(full_path, entry, copy_source)`
@@ -1,17 +1,379 b''
1 use crate::dirstate::status::IgnoreFnType;
2 use crate::dirstate_tree::dirstate_map::ChildNodes;
1 use crate::dirstate_tree::dirstate_map::DirstateMap;
3 use crate::dirstate_tree::dirstate_map::DirstateMap;
4 use crate::dirstate_tree::dirstate_map::Node;
5 use crate::matchers::get_ignore_function;
2 use crate::matchers::Matcher;
6 use crate::matchers::Matcher;
7 use crate::utils::files::get_bytes_from_os_string;
8 use crate::utils::hg_path::HgPath;
3 use crate::DirstateStatus;
9 use crate::DirstateStatus;
10 use crate::EntryState;
11 use crate::HgPathBuf;
4 use crate::PatternFileWarning;
12 use crate::PatternFileWarning;
5 use crate::StatusError;
13 use crate::StatusError;
6 use crate::StatusOptions;
14 use crate::StatusOptions;
15 use std::borrow::Cow;
16 use std::io;
17 use std::path::Path;
7 use std::path::PathBuf;
18 use std::path::PathBuf;
8
19
9 pub fn status<'a>(
20 /// Returns the status of the working directory compared to its parent
10 _dmap: &'a mut DirstateMap,
21 /// changeset.
11 _matcher: &'a (dyn Matcher + Sync),
22 ///
12 _root_dir: PathBuf,
23 /// This algorithm is based on traversing the filesystem tree (`fs` in function
13 _ignore_files: Vec<PathBuf>,
24 /// and variable names) and dirstate tree at the same time. The core of this
14 _options: StatusOptions,
25 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
15 ) -> Result<(DirstateStatus<'a>, Vec<PatternFileWarning>), StatusError> {
26 /// and its use of `itertools::merge_join_by`. When reaching a path that only
16 todo!()
27 /// exists in one of the two trees, depending on information requested by
28 /// `options` we may need to traverse the remaining subtree.
29 pub fn status<'tree>(
30 dmap: &'tree mut DirstateMap,
31 matcher: &(dyn Matcher + Sync),
32 root_dir: PathBuf,
33 ignore_files: Vec<PathBuf>,
34 options: StatusOptions,
35 ) -> Result<(DirstateStatus<'tree>, Vec<PatternFileWarning>), StatusError> {
36 let (ignore_fn, warnings): (IgnoreFnType, _) =
37 if options.list_ignored || options.list_unknown {
38 get_ignore_function(ignore_files, &root_dir)?
39 } else {
40 (Box::new(|&_| true), vec![])
41 };
42
43 let mut common = StatusCommon {
44 options,
45 matcher,
46 ignore_fn,
47 outcome: DirstateStatus::default(),
48 };
49 let is_at_repo_root = true;
50 let hg_path = HgPath::new("");
51 let has_ignored_ancestor = false;
52 common.traverse_fs_directory_and_dirstate(
53 has_ignored_ancestor,
54 &mut dmap.root,
55 hg_path,
56 &root_dir,
57 is_at_repo_root,
58 );
59 Ok((common.outcome, warnings))
60 }
61
62 /// Bag of random things needed by various parts of the algorithm. Reduces the
63 /// number of parameters passed to functions.
64 struct StatusCommon<'tree, 'a> {
65 options: StatusOptions,
66 matcher: &'a (dyn Matcher + Sync),
67 ignore_fn: IgnoreFnType<'a>,
68 outcome: DirstateStatus<'tree>,
17 }
69 }
70
71 impl<'tree, 'a> StatusCommon<'tree, 'a> {
72 fn traverse_fs_directory_and_dirstate(
73 &mut self,
74 has_ignored_ancestor: bool,
75 dirstate_nodes: &'tree mut ChildNodes,
76 directory_hg_path: &HgPath,
77 fs_path: &Path,
78 is_at_repo_root: bool,
79 ) {
80 // TODO: handle I/O errors
81 let mut fs_entries =
82 DirEntry::read_dir(fs_path, is_at_repo_root).unwrap();
83
84 // `merge_join_by` requires both its input iterators to be sorted:
85
86 // * `BTreeMap` iterates according to keys’ ordering by definition
87
88 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
89 // https://github.com/rust-lang/rust/issues/34162
90 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
91
92 for pair in itertools::merge_join_by(
93 dirstate_nodes,
94 &fs_entries,
95 |(full_path, _node), fs_entry| {
96 full_path.base_name().cmp(&fs_entry.base_name)
97 },
98 ) {
99 use itertools::EitherOrBoth::*;
100 match pair {
101 Both((hg_path, dirstate_node), fs_entry) => {
102 self.traverse_fs_and_dirstate(
103 fs_entry,
104 hg_path.full_path(),
105 dirstate_node,
106 has_ignored_ancestor,
107 );
108 }
109 Left((hg_path, dirstate_node)) => self.traverse_dirstate_only(
110 hg_path.full_path(),
111 dirstate_node,
112 ),
113 Right(fs_entry) => self.traverse_fs_only(
114 has_ignored_ancestor,
115 directory_hg_path,
116 fs_entry,
117 ),
118 }
119 }
120 }
121
122 fn traverse_fs_and_dirstate(
123 &mut self,
124 fs_entry: &DirEntry,
125 hg_path: &'tree HgPath,
126 dirstate_node: &'tree mut Node,
127 has_ignored_ancestor: bool,
128 ) {
129 if fs_entry.metadata.is_dir() {
130 if self.options.collect_traversed_dirs {
131 self.outcome.traversed.push(hg_path.into())
132 }
133 // If we previously had a file here, it was removed (with
134 // `hg rm` or similar) or deleted before it could be
135 // replaced by a directory.
136 self.mark_removed_or_deleted_if_file(
137 hg_path,
138 dirstate_node.state(),
139 );
140 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
141 let is_at_repo_root = false;
142 self.traverse_fs_directory_and_dirstate(
143 is_ignored,
144 &mut dirstate_node.children,
145 hg_path,
146 &fs_entry.full_path,
147 is_at_repo_root,
148 );
149 } else {
150 if self.matcher.matches(hg_path) {
151 let full_path = Cow::from(hg_path);
152 if let Some(entry) = &dirstate_node.entry {
153 match entry.state {
154 EntryState::Added => {
155 self.outcome.added.push(full_path)
156 }
157 EntryState::Removed => {
158 self.outcome.removed.push(full_path)
159 }
160 EntryState::Merged => {
161 self.outcome.modified.push(full_path)
162 }
163 EntryState::Normal => {
164 self.handle_normal_file(
165 full_path,
166 dirstate_node,
167 entry,
168 fs_entry,
169 );
170 }
171 // This variant is not used in DirstateMap
172 // nodes
173 EntryState::Unknown => unreachable!(),
174 }
175 } else {
176 // `node.entry.is_none()` indicates a "directory"
177 // node, but the filesystem has a file
178 self.mark_unknown_or_ignored(
179 has_ignored_ancestor,
180 full_path,
181 )
182 }
183 }
184
185 for (child_hg_path, child_node) in &mut dirstate_node.children {
186 self.traverse_dirstate_only(
187 child_hg_path.full_path(),
188 child_node,
189 )
190 }
191 }
192 }
193
194 /// A file with `EntryState::Normal` in the dirstate was found in the
195 /// filesystem
196 fn handle_normal_file(
197 &mut self,
198 full_path: Cow<'tree, HgPath>,
199 dirstate_node: &Node,
200 entry: &crate::DirstateEntry,
201 fs_entry: &DirEntry,
202 ) {
203 // Keep the low 31 bits
204 fn truncate_u64(value: u64) -> i32 {
205 (value & 0x7FFF_FFFF) as i32
206 }
207 fn truncate_i64(value: i64) -> i32 {
208 (value & 0x7FFF_FFFF) as i32
209 }
210
211 let mode_changed = || {
212 self.options.check_exec && entry.mode_changed(&fs_entry.metadata)
213 };
214 let size_changed = entry.size != truncate_u64(fs_entry.metadata.len());
215 if entry.size >= 0
216 && size_changed
217 && fs_entry.metadata.file_type().is_symlink()
218 {
219 // issue6456: Size returned may be longer due to encryption
220 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
221 self.outcome.unsure.push(full_path)
222 } else if dirstate_node.copy_source.is_some()
223 || entry.is_from_other_parent()
224 || (entry.size >= 0 && (size_changed || mode_changed()))
225 {
226 self.outcome.modified.push(full_path)
227 } else {
228 let mtime = mtime_seconds(&fs_entry.metadata);
229 if truncate_i64(mtime) != entry.mtime
230 || mtime == self.options.last_normal_time
231 {
232 self.outcome.unsure.push(full_path)
233 } else if self.options.list_clean {
234 self.outcome.clean.push(full_path)
235 }
236 }
237 }
238
239 /// A node in the dirstate tree has no corresponding filesystem entry
240 fn traverse_dirstate_only(
241 &mut self,
242 hg_path: &'tree HgPath,
243 dirstate_node: &'tree mut Node,
244 ) {
245 self.mark_removed_or_deleted_if_file(hg_path, dirstate_node.state());
246 for (child_hg_path, child_node) in &mut dirstate_node.children {
247 self.traverse_dirstate_only(child_hg_path.full_path(), child_node)
248 }
249 }
250
251 /// A node in the dirstate tree has no corresponding *file* on the
252 /// filesystem
253 ///
254 /// Does nothing on a "directory" node
255 fn mark_removed_or_deleted_if_file(
256 &mut self,
257 hg_path: &'tree HgPath,
258 dirstate_node_state: Option<EntryState>,
259 ) {
260 if let Some(state) = dirstate_node_state {
261 if self.matcher.matches(hg_path) {
262 if let EntryState::Removed = state {
263 self.outcome.removed.push(hg_path.into())
264 } else {
265 self.outcome.deleted.push(hg_path.into())
266 }
267 }
268 }
269 }
270
271 /// Something in the filesystem has no corresponding dirstate node
272 fn traverse_fs_only(
273 &mut self,
274 has_ignored_ancestor: bool,
275 directory_hg_path: &HgPath,
276 fs_entry: &DirEntry,
277 ) {
278 let hg_path = directory_hg_path.join(&fs_entry.base_name);
279 if fs_entry.metadata.is_dir() {
280 let is_ignored =
281 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
282 let traverse_children = if is_ignored {
283 // Descendants of an ignored directory are all ignored
284 self.options.list_ignored
285 } else {
286 // Descendants of an unknown directory may be either unknown or
287 // ignored
288 self.options.list_unknown || self.options.list_ignored
289 };
290 if traverse_children {
291 let is_at_repo_root = false;
292 // TODO: handle I/O errors
293 let children_fs_entries =
294 DirEntry::read_dir(&fs_entry.full_path, is_at_repo_root)
295 .unwrap();
296 for child_fs_entry in children_fs_entries {
297 self.traverse_fs_only(
298 is_ignored,
299 &hg_path,
300 &child_fs_entry,
301 )
302 }
303 }
304 if self.options.collect_traversed_dirs {
305 self.outcome.traversed.push(hg_path.into())
306 }
307 } else if self.matcher.matches(&hg_path) {
308 self.mark_unknown_or_ignored(has_ignored_ancestor, hg_path.into())
309 }
310 }
311
312 fn mark_unknown_or_ignored(
313 &mut self,
314 has_ignored_ancestor: bool,
315 hg_path: Cow<'tree, HgPath>,
316 ) {
317 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
318 if is_ignored {
319 if self.options.list_ignored {
320 self.outcome.ignored.push(hg_path)
321 }
322 } else {
323 if self.options.list_unknown {
324 self.outcome.unknown.push(hg_path)
325 }
326 }
327 }
328 }
329
330 #[cfg(unix)] // TODO
331 fn mtime_seconds(metadata: &std::fs::Metadata) -> i64 {
332 // Going through `Metadata::modified()` would be portable, but would take
333 // care to construct a `SystemTime` value with sub-second precision just
334 // for us to throw that away here.
335 use std::os::unix::fs::MetadataExt;
336 metadata.mtime()
337 }
338
339 struct DirEntry {
340 base_name: HgPathBuf,
341 full_path: PathBuf,
342 metadata: std::fs::Metadata,
343 }
344
345 impl DirEntry {
346 /// Returns **unsorted** entries in the given directory, with name and
347 /// metadata.
348 ///
349 /// If a `.hg` sub-directory is encountered:
350 ///
351 /// * At the repository root, ignore that sub-directory
352 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
353 /// list instead.
354 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
355 let mut results = Vec::new();
356 for entry in path.read_dir()? {
357 let entry = entry?;
358 let metadata = entry.metadata()?;
359 let name = get_bytes_from_os_string(entry.file_name());
360 // FIXME don't do this when cached
361 if name == b".hg" {
362 if is_at_repo_root {
363 // Skip the repo’s own .hg (might be a symlink)
364 continue;
365 } else if metadata.is_dir() {
366 // A .hg sub-directory at another location means a subrepo,
367 // skip it entirely.
368 return Ok(Vec::new());
369 }
370 }
371 results.push(DirEntry {
372 base_name: name.into(),
373 full_path: entry.path(),
374 metadata,
375 })
376 }
377 Ok(results)
378 }
379 }
@@ -17,7 +17,7 b' use crate::utils::{'
17 use lazy_static::lazy_static;
17 use lazy_static::lazy_static;
18 use same_file::is_same_file;
18 use same_file::is_same_file;
19 use std::borrow::{Cow, ToOwned};
19 use std::borrow::{Cow, ToOwned};
20 use std::ffi::OsStr;
20 use std::ffi::{OsStr, OsString};
21 use std::fs::Metadata;
21 use std::fs::Metadata;
22 use std::iter::FusedIterator;
22 use std::iter::FusedIterator;
23 use std::ops::Deref;
23 use std::ops::Deref;
@@ -53,6 +53,12 b' pub fn get_bytes_from_os_str(str: impl A'
53 str.as_ref().as_bytes().to_vec()
53 str.as_ref().as_bytes().to_vec()
54 }
54 }
55
55
56 #[cfg(unix)]
57 pub fn get_bytes_from_os_string(str: OsString) -> Vec<u8> {
58 use std::os::unix::ffi::OsStringExt;
59 str.into_vec()
60 }
61
56 /// An iterator over repository path yielding itself and its ancestors.
62 /// An iterator over repository path yielding itself and its ancestors.
57 #[derive(Copy, Clone, Debug)]
63 #[derive(Copy, Clone, Debug)]
58 pub struct Ancestors<'a> {
64 pub struct Ancestors<'a> {
@@ -6,6 +6,7 b''
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 use std::borrow::Borrow;
8 use std::borrow::Borrow;
9 use std::borrow::Cow;
9 use std::convert::TryFrom;
10 use std::convert::TryFrom;
10 use std::ffi::{OsStr, OsString};
11 use std::ffi::{OsStr, OsString};
11 use std::fmt;
12 use std::fmt;
@@ -535,6 +536,24 b' impl TryFrom<PathBuf> for HgPathBuf {'
535 }
536 }
536 }
537 }
537
538
539 impl From<HgPathBuf> for Cow<'_, HgPath> {
540 fn from(path: HgPathBuf) -> Self {
541 Cow::Owned(path)
542 }
543 }
544
545 impl<'a> From<&'a HgPath> for Cow<'a, HgPath> {
546 fn from(path: &'a HgPath) -> Self {
547 Cow::Borrowed(path)
548 }
549 }
550
551 impl<'a> From<&'a HgPathBuf> for Cow<'a, HgPath> {
552 fn from(path: &'a HgPathBuf) -> Self {
553 Cow::Borrowed(&**path)
554 }
555 }
556
538 #[cfg(test)]
557 #[cfg(test)]
539 mod tests {
558 mod tests {
540 use super::*;
559 use super::*;
General Comments 0
You need to be logged in to leave comments. Login now