##// END OF EJS Templates
rust: add `dirstate_tree` module...
Raphaël Gomès -
r46136:b51167d7 default
parent child Browse files
Show More
@@ -0,0 +1,14 b''
1 // dirstate_tree.rs
2 //
3 // Copyright 2020, Raphaël Gomès <rgomes@octobus.net>
4 //
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
7
8 //! Special-case radix tree that matches a filesystem hierarchy for use in the
9 //! dirstate.
10 //! It has not been optimized at all yet.
11
12 pub mod iter;
13 pub mod node;
14 pub mod tree;
@@ -0,0 +1,358 b''
1 // iter.rs
2 //
3 // Copyright 2020, Raphaël Gomès <rgomes@octobus.net>
4 //
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
7
8 use super::node::{Node, NodeKind};
9 use super::tree::Tree;
10 use crate::dirstate::dirstate_tree::node::Directory;
11 use crate::dirstate::status::Dispatch;
12 use crate::utils::hg_path::{hg_path_to_path_buf, HgPath, HgPathBuf};
13 use crate::DirstateEntry;
14 use std::borrow::Cow;
15 use std::collections::VecDeque;
16 use std::iter::{FromIterator, FusedIterator};
17 use std::path::PathBuf;
18
19 impl FromIterator<(HgPathBuf, DirstateEntry)> for Tree {
20 fn from_iter<T: IntoIterator<Item = (HgPathBuf, DirstateEntry)>>(iter: T) -> Self {
21 let mut tree = Self::new();
22 for (path, entry) in iter {
23 tree.insert(path, entry);
24 }
25 tree
26 }
27 }
28
29 /// Iterator of all entries in the dirstate tree.
30 ///
31 /// It has no particular ordering.
32 pub struct Iter<'a> {
33 to_visit: VecDeque<(Cow<'a, [u8]>, &'a Node)>,
34 }
35
36 impl<'a> Iter<'a> {
37 pub fn new(node: &'a Node) -> Iter<'a> {
38 let mut to_visit = VecDeque::new();
39 to_visit.push_back((Cow::Borrowed(&b""[..]), node));
40 Self { to_visit }
41 }
42 }
43
44 impl<'a> Iterator for Iter<'a> {
45 type Item = (HgPathBuf, DirstateEntry);
46
47 fn next(&mut self) -> Option<Self::Item> {
48 while let Some((base_path, node)) = self.to_visit.pop_front() {
49 match &node.kind {
50 NodeKind::Directory(dir) => {
51 add_children_to_visit(&mut self.to_visit, &base_path, &dir);
52 if let Some(file) = &dir.was_file {
53 return Some((HgPathBuf::from_bytes(&base_path), file.entry));
54 }
55 }
56 NodeKind::File(file) => {
57 if let Some(dir) = &file.was_directory {
58 add_children_to_visit(&mut self.to_visit, &base_path, &dir);
59 }
60 return Some((HgPathBuf::from_bytes(&base_path), file.entry));
61 }
62 }
63 }
64 None
65 }
66 }
67
68 impl<'a> FusedIterator for Iter<'a> {}
69
70 /// Iterator of all entries in the dirstate tree, with a special filesystem
71 /// handling for the directories containing said entries.
72 ///
73 /// It checks every directory on-disk to see if it has become a symlink, to
74 /// prevent a potential security issue.
75 /// Using this information, it may dispatch `status` information early: it
76 /// returns canonical paths along with `Shortcut`s, which are either a
77 /// `DirstateEntry` or a `Dispatch`, if the fate of said path has already been
78 /// determined.
79 ///
80 /// Like `Iter`, it has no particular ordering.
81 pub struct FsIter<'a> {
82 root_dir: PathBuf,
83 to_visit: VecDeque<(Cow<'a, [u8]>, &'a Node)>,
84 shortcuts: VecDeque<(HgPathBuf, StatusShortcut)>,
85 }
86
87 impl<'a> FsIter<'a> {
88 pub fn new(node: &'a Node, root_dir: PathBuf) -> FsIter<'a> {
89 let mut to_visit = VecDeque::new();
90 to_visit.push_back((Cow::Borrowed(&b""[..]), node));
91 Self {
92 root_dir,
93 to_visit,
94 shortcuts: Default::default(),
95 }
96 }
97
98 /// Mercurial tracks symlinks but *not* what they point to.
99 /// If a directory is moved and symlinked:
100 ///
101 /// ```bash
102 /// $ mkdir foo
103 /// $ touch foo/a
104 /// $ # commit...
105 /// $ mv foo bar
106 /// $ ln -s bar foo
107 /// ```
108 /// We need to dispatch the new symlink as `Unknown` and all the
109 /// descendents of the directory it replace as `Deleted`.
110 fn dispatch_symlinked_directory(&mut self, path: impl AsRef<HgPath>, node: &Node) {
111 let path = path.as_ref();
112 self.shortcuts
113 .push_back((path.to_owned(), StatusShortcut::Dispatch(Dispatch::Unknown)));
114 for (file, _) in node.iter() {
115 self.shortcuts.push_back((
116 path.join(&file),
117 StatusShortcut::Dispatch(Dispatch::Deleted),
118 ));
119 }
120 }
121
122 /// Returns `true` if the canonical `path` of a directory corresponds to a
123 /// symlink on disk. It means it was moved and symlinked after the last
124 /// dirstate update.
125 ///
126 /// # Special cases
127 ///
128 /// Returns `false` for the repository root.
129 /// Returns `false` on io error, error handling is outside of the iterator.
130 fn directory_became_symlink(&mut self, path: &HgPath) -> bool {
131 if path.is_empty() {
132 return false;
133 }
134 let filename_as_path = match hg_path_to_path_buf(&path) {
135 Ok(p) => p,
136 _ => return false,
137 };
138 let meta = self.root_dir.join(filename_as_path).symlink_metadata();
139 match meta {
140 Ok(ref m) if m.file_type().is_symlink() => true,
141 _ => return false,
142 }
143 }
144 }
145
146 /// Returned by `FsIter`, since the `Dispatch` of any given entry may already
147 /// be determined during the iteration. This is necessary for performance
148 /// reasons, since hierarchical information is needed to `Dispatch` an entire
149 /// subtree efficiently.
150 #[derive(Debug, Copy, Clone)]
151 pub enum StatusShortcut {
152 /// A entry in the dirstate for further inspection
153 Entry(DirstateEntry),
154 /// The result of the status of the corresponding file
155 Dispatch(Dispatch),
156 }
157
158 impl<'a> Iterator for FsIter<'a> {
159 type Item = (HgPathBuf, StatusShortcut);
160
161 fn next(&mut self) -> Option<Self::Item> {
162 // If any paths have already been `Dispatch`-ed, return them
163 while let Some(res) = self.shortcuts.pop_front() {
164 return Some(res);
165 }
166
167 while let Some((base_path, node)) = self.to_visit.pop_front() {
168 match &node.kind {
169 NodeKind::Directory(dir) => {
170 let canonical_path = HgPath::new(&base_path);
171 if self.directory_became_symlink(canonical_path) {
172 // Potential security issue, don't do a normal
173 // traversal, force the results.
174 self.dispatch_symlinked_directory(canonical_path, &node);
175 continue;
176 }
177 add_children_to_visit(&mut self.to_visit, &base_path, &dir);
178 if let Some(file) = &dir.was_file {
179 return Some((
180 HgPathBuf::from_bytes(&base_path),
181 StatusShortcut::Entry(file.entry),
182 ));
183 }
184 }
185 NodeKind::File(file) => {
186 if let Some(dir) = &file.was_directory {
187 add_children_to_visit(&mut self.to_visit, &base_path, &dir);
188 }
189 return Some((
190 HgPathBuf::from_bytes(&base_path),
191 StatusShortcut::Entry(file.entry),
192 ));
193 }
194 }
195 }
196
197 None
198 }
199 }
200
201 impl<'a> FusedIterator for FsIter<'a> {}
202
203 fn join_path<'a, 'b>(path: &'a [u8], other: &'b [u8]) -> Cow<'b, [u8]> {
204 if path.is_empty() {
205 other.into()
206 } else {
207 [path, &b"/"[..], other].concat().into()
208 }
209 }
210
211 /// Adds all children of a given directory `dir` to the visit queue `to_visit`
212 /// prefixed by a `base_path`.
213 fn add_children_to_visit<'a>(
214 to_visit: &mut VecDeque<(Cow<'a, [u8]>, &'a Node)>,
215 base_path: &[u8],
216 dir: &'a Directory,
217 ) {
218 to_visit.extend(dir.children.iter().map(|(path, child)| {
219 let full_path = join_path(&base_path, &path);
220 (Cow::from(full_path), child)
221 }));
222 }
223
224 #[cfg(test)]
225 mod tests {
226 use super::*;
227 use crate::utils::hg_path::HgPath;
228 use crate::{EntryState, FastHashMap};
229 use std::collections::HashSet;
230
231 #[test]
232 fn test_iteration() {
233 let mut tree = Tree::new();
234
235 assert_eq!(
236 tree.insert(
237 HgPathBuf::from_bytes(b"foo/bar"),
238 DirstateEntry {
239 state: EntryState::Merged,
240 mode: 41,
241 mtime: 42,
242 size: 43,
243 }
244 ),
245 None
246 );
247
248 assert_eq!(
249 tree.insert(
250 HgPathBuf::from_bytes(b"foo2"),
251 DirstateEntry {
252 state: EntryState::Merged,
253 mode: 40,
254 mtime: 41,
255 size: 42,
256 }
257 ),
258 None
259 );
260
261 assert_eq!(
262 tree.insert(
263 HgPathBuf::from_bytes(b"foo/baz"),
264 DirstateEntry {
265 state: EntryState::Normal,
266 mode: 0,
267 mtime: 0,
268 size: 0,
269 }
270 ),
271 None
272 );
273
274 assert_eq!(
275 tree.insert(
276 HgPathBuf::from_bytes(b"foo/bap/nested"),
277 DirstateEntry {
278 state: EntryState::Normal,
279 mode: 0,
280 mtime: 0,
281 size: 0,
282 }
283 ),
284 None
285 );
286
287 assert_eq!(tree.len(), 4);
288
289 let results: HashSet<_> = tree.iter().map(|(c, _)| c.to_owned()).collect();
290 dbg!(&results);
291 assert!(results.contains(HgPath::new(b"foo2")));
292 assert!(results.contains(HgPath::new(b"foo/bar")));
293 assert!(results.contains(HgPath::new(b"foo/baz")));
294 assert!(results.contains(HgPath::new(b"foo/bap/nested")));
295
296 let mut iter = tree.iter();
297 assert!(iter.next().is_some());
298 assert!(iter.next().is_some());
299 assert!(iter.next().is_some());
300 assert!(iter.next().is_some());
301 assert_eq!(None, iter.next());
302 assert_eq!(None, iter.next());
303 drop(iter);
304
305 assert_eq!(
306 tree.insert(
307 HgPathBuf::from_bytes(b"foo/bap/nested/a"),
308 DirstateEntry {
309 state: EntryState::Normal,
310 mode: 0,
311 mtime: 0,
312 size: 0,
313 }
314 ),
315 None
316 );
317
318 let results: FastHashMap<_, _> = tree.iter().collect();
319 assert!(results.contains_key(HgPath::new(b"foo2")));
320 assert!(results.contains_key(HgPath::new(b"foo/bar")));
321 assert!(results.contains_key(HgPath::new(b"foo/baz")));
322 // Is a dir but `was_file`, so it's listed as a removed file
323 assert!(results.contains_key(HgPath::new(b"foo/bap/nested")));
324 assert!(results.contains_key(HgPath::new(b"foo/bap/nested/a")));
325
326 // insert removed file (now directory) after nested file
327 assert_eq!(
328 tree.insert(
329 HgPathBuf::from_bytes(b"a/a"),
330 DirstateEntry {
331 state: EntryState::Normal,
332 mode: 0,
333 mtime: 0,
334 size: 0,
335 }
336 ),
337 None
338 );
339
340 // `insert` returns `None` for a directory
341 assert_eq!(
342 tree.insert(
343 HgPathBuf::from_bytes(b"a"),
344 DirstateEntry {
345 state: EntryState::Removed,
346 mode: 0,
347 mtime: 0,
348 size: 0,
349 }
350 ),
351 None
352 );
353
354 let results: FastHashMap<_, _> = tree.iter().collect();
355 assert!(results.contains_key(HgPath::new(b"a")));
356 assert!(results.contains_key(HgPath::new(b"a/a")));
357 }
358 }
@@ -0,0 +1,377 b''
1 // node.rs
2 //
3 // Copyright 2020, Raphaël Gomès <rgomes@octobus.net>
4 //
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
7
8 use super::iter::Iter;
9 use crate::utils::hg_path::HgPathBuf;
10 use crate::{DirstateEntry, EntryState, FastHashMap};
11
12 /// Represents a filesystem directory in the dirstate tree
13 #[derive(Debug, Default, Clone, PartialEq)]
14 pub struct Directory {
15 /// Contains the old file information if it existed between changesets.
16 /// Happens if a file `foo` is marked as removed, removed from the
17 /// filesystem then a directory `foo` is created and at least one of its
18 /// descendents is added to Mercurial.
19 pub(super) was_file: Option<Box<File>>,
20 pub(super) children: FastHashMap<Vec<u8>, Node>,
21 }
22
23 /// Represents a filesystem file (or symlink) in the dirstate tree
24 #[derive(Debug, Clone, PartialEq)]
25 pub struct File {
26 /// Contains the old structure if it existed between changesets.
27 /// Happens all descendents of `foo` marked as removed and removed from
28 /// the filesystem, then a file `foo` is created and added to Mercurial.
29 pub(super) was_directory: Option<Box<Directory>>,
30 pub(super) entry: DirstateEntry,
31 }
32
33 #[derive(Debug, Clone, PartialEq)]
34 pub enum NodeKind {
35 Directory(Directory),
36 File(File),
37 }
38
39 #[derive(Debug, Default, Clone, PartialEq)]
40 pub struct Node {
41 pub kind: NodeKind,
42 }
43
44 impl Default for NodeKind {
45 fn default() -> Self {
46 NodeKind::Directory(Default::default())
47 }
48 }
49
50 impl Node {
51 pub fn insert(&mut self, path: &[u8], new_entry: DirstateEntry) -> InsertResult {
52 let mut split = path.splitn(2, |&c| c == b'/');
53 let head = split.next().unwrap_or(b"");
54 let tail = split.next().unwrap_or(b"");
55
56 if let NodeKind::File(file) = &mut self.kind {
57 if tail.is_empty() && head.is_empty() {
58 // We're modifying the current file
59 let new = Self {
60 kind: NodeKind::File(File {
61 entry: new_entry,
62 ..file.clone()
63 }),
64 };
65 return InsertResult {
66 did_insert: false,
67 old_entry: Some(std::mem::replace(self, new)),
68 };
69 } else {
70 match file.entry.state {
71 // Only replace the current file with a directory if it's
72 // marked as `Removed`
73 EntryState::Removed => {
74 self.kind = NodeKind::Directory(Directory {
75 was_file: Some(Box::from(file.clone())),
76 children: Default::default(),
77 })
78 }
79 _ => return Node::insert_in_file(file, new_entry, head, tail),
80 }
81 }
82 }
83
84 match &mut self.kind {
85 NodeKind::Directory(directory) => {
86 return Node::insert_in_directory(directory, new_entry, head, tail);
87 }
88 NodeKind::File(_) => unreachable!("The file case has already been handled"),
89 }
90 }
91
92 /// The current file still exists and is not marked as `Removed`.
93 /// Insert the entry in its `was_directory`.
94 fn insert_in_file(
95 file: &mut File,
96 new_entry: DirstateEntry,
97 head: &[u8],
98 tail: &[u8],
99 ) -> InsertResult {
100 if let Some(d) = &mut file.was_directory {
101 Node::insert_in_directory(d, new_entry, head, tail)
102 } else {
103 let mut dir = Directory {
104 was_file: None,
105 children: FastHashMap::default(),
106 };
107 let res = Node::insert_in_directory(&mut dir, new_entry, head, tail);
108 file.was_directory = Some(Box::new(dir));
109 res
110 }
111 }
112
113 /// Insert an entry in the subtree of `directory`
114 fn insert_in_directory(
115 directory: &mut Directory,
116 new_entry: DirstateEntry,
117 head: &[u8],
118 tail: &[u8],
119 ) -> InsertResult {
120 let mut res = InsertResult::default();
121
122 if let Some(node) = directory.children.get_mut(head) {
123 // Node exists
124 match &mut node.kind {
125 NodeKind::Directory(subdir) => {
126 if tail.is_empty() {
127 let becomes_file = Self {
128 kind: NodeKind::File(File {
129 was_directory: Some(Box::from(subdir.clone())),
130 entry: new_entry,
131 }),
132 };
133 let old_entry = directory.children.insert(head.to_owned(), becomes_file);
134 return InsertResult {
135 did_insert: true,
136 old_entry,
137 };
138 } else {
139 res = node.insert(tail, new_entry);
140 }
141 }
142 NodeKind::File(_) => {
143 res = node.insert(tail, new_entry);
144 }
145 }
146 } else if tail.is_empty() {
147 // File does not already exist
148 directory.children.insert(
149 head.to_owned(),
150 Self {
151 kind: NodeKind::File(File {
152 was_directory: None,
153 entry: new_entry,
154 }),
155 },
156 );
157 res.did_insert = true;
158 } else {
159 // Directory does not already exist
160 let mut nested = Self {
161 kind: NodeKind::Directory(Directory {
162 was_file: None,
163 children: Default::default(),
164 }),
165 };
166 res = nested.insert(tail, new_entry);
167 directory.children.insert(head.to_owned(), nested);
168 }
169 res
170 }
171
172 /// Removes an entry from the tree, returns a `RemoveResult`.
173 pub fn remove(&mut self, path: &[u8]) -> RemoveResult {
174 let empty_result = RemoveResult::default();
175 if path.is_empty() {
176 return empty_result;
177 }
178 let mut split = path.splitn(2, |&c| c == b'/');
179 let head = split.next();
180 let tail = split.next().unwrap_or(b"");
181
182 let head = match head {
183 None => {
184 return empty_result;
185 }
186 Some(h) => h,
187 };
188 if head == path {
189 match &mut self.kind {
190 NodeKind::Directory(d) => {
191 return Node::remove_from_directory(head, d);
192 }
193 NodeKind::File(f) => {
194 if let Some(d) = &mut f.was_directory {
195 let RemoveResult { old_entry, .. } = Node::remove_from_directory(head, d);
196 return RemoveResult {
197 cleanup: false,
198 old_entry,
199 };
200 }
201 }
202 }
203 empty_result
204 } else {
205 // Look into the dirs
206 match &mut self.kind {
207 NodeKind::Directory(d) => {
208 if let Some(child) = d.children.get_mut(head) {
209 let mut res = child.remove(tail);
210 if res.cleanup {
211 d.children.remove(head);
212 }
213 res.cleanup = d.children.len() == 0 && d.was_file.is_none();
214 res
215 } else {
216 empty_result
217 }
218 }
219 NodeKind::File(f) => {
220 if let Some(d) = &mut f.was_directory {
221 if let Some(child) = d.children.get_mut(head) {
222 let RemoveResult { cleanup, old_entry } = child.remove(tail);
223 if cleanup {
224 d.children.remove(head);
225 }
226 if d.children.len() == 0 && d.was_file.is_none() {
227 f.was_directory = None;
228 }
229
230 return RemoveResult {
231 cleanup: false,
232 old_entry,
233 };
234 }
235 }
236 empty_result
237 }
238 }
239 }
240 }
241
242 fn remove_from_directory(head: &[u8], d: &mut Directory) -> RemoveResult {
243 if let Some(node) = d.children.get_mut(head) {
244 return match &mut node.kind {
245 NodeKind::Directory(d) => {
246 if let Some(f) = &mut d.was_file {
247 let entry = f.entry;
248 d.was_file = None;
249 RemoveResult {
250 cleanup: false,
251 old_entry: Some(entry),
252 }
253 } else {
254 RemoveResult::default()
255 }
256 }
257 NodeKind::File(f) => {
258 let entry = f.entry;
259 let mut cleanup = false;
260 match &f.was_directory {
261 None => {
262 if d.children.len() == 1 {
263 cleanup = true;
264 }
265 d.children.remove(head);
266 }
267 Some(dir) => {
268 node.kind = NodeKind::Directory(*dir.clone());
269 }
270 }
271
272 RemoveResult {
273 cleanup: cleanup,
274 old_entry: Some(entry),
275 }
276 }
277 };
278 }
279 RemoveResult::default()
280 }
281
282 pub fn get(&self, path: &[u8]) -> Option<&Node> {
283 if path.is_empty() {
284 return Some(&self);
285 }
286 let mut split = path.splitn(2, |&c| c == b'/');
287 let head = split.next();
288 let tail = split.next().unwrap_or(b"");
289
290 let head = match head {
291 None => {
292 return Some(&self);
293 }
294 Some(h) => h,
295 };
296 match &self.kind {
297 NodeKind::Directory(d) => {
298 if let Some(child) = d.children.get(head) {
299 return child.get(tail);
300 }
301 }
302 NodeKind::File(f) => {
303 if let Some(d) = &f.was_directory {
304 if let Some(child) = d.children.get(head) {
305 return child.get(tail);
306 }
307 }
308 }
309 }
310
311 None
312 }
313
314 pub fn get_mut(&mut self, path: &[u8]) -> Option<&mut NodeKind> {
315 if path.is_empty() {
316 return Some(&mut self.kind);
317 }
318 let mut split = path.splitn(2, |&c| c == b'/');
319 let head = split.next();
320 let tail = split.next().unwrap_or(b"");
321
322 let head = match head {
323 None => {
324 return Some(&mut self.kind);
325 }
326 Some(h) => h,
327 };
328 match &mut self.kind {
329 NodeKind::Directory(d) => {
330 if let Some(child) = d.children.get_mut(head) {
331 return child.get_mut(tail);
332 }
333 }
334 NodeKind::File(f) => {
335 if let Some(d) = &mut f.was_directory {
336 if let Some(child) = d.children.get_mut(head) {
337 return child.get_mut(tail);
338 }
339 }
340 }
341 }
342
343 None
344 }
345
346 pub fn iter(&self) -> Iter {
347 Iter::new(self)
348 }
349 }
350
351 /// Information returned to the caller of an `insert` operation for integrity.
352 #[derive(Debug, Default)]
353 pub struct InsertResult {
354 /// Whether the insertion resulted in an actual insertion and not an
355 /// update
356 pub(super) did_insert: bool,
357 /// The entry that was replaced, if it exists
358 pub(super) old_entry: Option<Node>,
359 }
360
361 /// Information returned to the caller of a `remove` operation integrity.
362 #[derive(Debug, Default)]
363 pub struct RemoveResult {
364 /// If the caller needs to remove the current node
365 pub(super) cleanup: bool,
366 /// The entry that was replaced, if it exists
367 pub(super) old_entry: Option<DirstateEntry>,
368 }
369
370 impl<'a> IntoIterator for &'a Node {
371 type Item = (HgPathBuf, DirstateEntry);
372 type IntoIter = Iter<'a>;
373
374 fn into_iter(self) -> Self::IntoIter {
375 self.iter()
376 }
377 }
This diff has been collapsed as it changes many lines, (661 lines changed) Show them Hide them
@@ -0,0 +1,661 b''
1 // tree.rs
2 //
3 // Copyright 2020, Raphaël Gomès <rgomes@octobus.net>
4 //
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
7
8 use super::iter::Iter;
9 use super::node::{Directory, Node, NodeKind};
10 use crate::dirstate::dirstate_tree::iter::FsIter;
11 use crate::dirstate::dirstate_tree::node::{InsertResult, RemoveResult};
12 use crate::utils::hg_path::{HgPath, HgPathBuf};
13 use crate::DirstateEntry;
14 use std::path::PathBuf;
15
16 /// A specialized tree to represent the Mercurial dirstate.
17 ///
18 /// # Advantages over a flat structure
19 ///
20 /// The dirstate is inherently hierarchical, since it's a representation of the
21 /// file structure of the project. The current dirstate format is flat, and
22 /// while that affords us potentially great (unordered) iteration speeds, the
23 /// need to retrieve a given path is great enough that you need some kind of
24 /// hashmap or tree in a lot of cases anyway.
25 ///
26 /// Going with a tree allows us to be smarter:
27 /// - Skipping an ignored directory means we don't visit its entire subtree
28 /// - Security auditing does not need to reconstruct paths backwards to check
29 /// for symlinked directories, this can be done during the iteration in a
30 /// very efficient fashion
31 /// - We don't need to build the directory information in another struct,
32 /// simplifying the code a lot, reducing the memory footprint and
33 /// potentially going faster depending on the implementation.
34 /// - We can use it to store a (platform-dependent) caching mechanism [1]
35 /// - And probably other types of optimizations.
36 ///
37 /// Only the first two items in this list are implemented as of this commit.
38 ///
39 /// [1]: https://www.mercurial-scm.org/wiki/DirsCachePlan
40 ///
41 ///
42 /// # Structure
43 ///
44 /// It's a prefix (radix) tree with no fixed arity, with a granularity of a
45 /// folder, allowing it to mimic a filesystem hierarchy:
46 ///
47 /// ```text
48 /// foo/bar
49 /// foo/baz
50 /// test
51 /// ```
52 /// Will be represented (simplified) by:
53 ///
54 /// ```text
55 /// Directory(root):
56 /// - File("test")
57 /// - Directory("foo"):
58 /// - File("bar")
59 /// - File("baz")
60 /// ```
61 ///
62 /// Moreover, it is special-cased for storing the dirstate and as such handles
63 /// cases that a simple `HashMap` would handle, but while preserving the
64 /// hierarchy.
65 /// For example:
66 ///
67 /// ```shell
68 /// $ touch foo
69 /// $ hg add foo
70 /// $ hg commit -m "foo"
71 /// $ hg remove foo
72 /// $ rm foo
73 /// $ mkdir foo
74 /// $ touch foo/a
75 /// $ hg add foo/a
76 /// $ hg status
77 /// R foo
78 /// A foo/a
79 /// ```
80 /// To represent this in a tree, one needs to keep track of whether any given
81 /// file was a directory and whether any given directory was a file at the last
82 /// dirstate update. This tree stores that information, but only in the right
83 /// circumstances by respecting the high-level rules that prevent nonsensical
84 /// structures to exist:
85 /// - a file can only be added as a child of another file if the latter is
86 /// marked as `Removed`
87 /// - a file cannot replace a folder unless all its descendents are removed
88 ///
89 /// This second rule is not checked by the tree for performance reasons, and
90 /// because high-level logic already prevents that state from happening.
91 ///
92 /// # Ordering
93 ///
94 /// It makes no guarantee of ordering for now.
95 #[derive(Debug, Default, Clone, PartialEq)]
96 pub struct Tree {
97 pub root: Node,
98 files_count: usize,
99 }
100
101 impl Tree {
102 pub fn new() -> Self {
103 Self {
104 root: Node {
105 kind: NodeKind::Directory(Directory {
106 was_file: None,
107 children: Default::default(),
108 }),
109 },
110 files_count: 0,
111 }
112 }
113
114 /// How many files (not directories) are stored in the tree, including ones
115 /// marked as `Removed`.
116 pub fn len(&self) -> usize {
117 self.files_count
118 }
119
120 pub fn is_empty(&self) -> bool {
121 self.len() == 0
122 }
123
124 /// Inserts a file in the tree and returns the previous entry if any.
125 pub fn insert(
126 &mut self,
127 path: impl AsRef<HgPath>,
128 kind: DirstateEntry,
129 ) -> Option<DirstateEntry> {
130 let old = self.insert_node(path, kind);
131 match old?.kind {
132 NodeKind::Directory(_) => None,
133 NodeKind::File(f) => Some(f.entry),
134 }
135 }
136
137 /// Low-level insertion method that returns the previous node (directories
138 /// included).
139 fn insert_node(&mut self, path: impl AsRef<HgPath>, kind: DirstateEntry) -> Option<Node> {
140 let InsertResult {
141 did_insert,
142 old_entry,
143 } = self.root.insert(path.as_ref().as_bytes(), kind);
144 self.files_count += if did_insert { 1 } else { 0 };
145 old_entry
146 }
147
148 /// Returns a reference to a node if it exists.
149 pub fn get_node(&self, path: impl AsRef<HgPath>) -> Option<&Node> {
150 self.root.get(path.as_ref().as_bytes())
151 }
152
153 /// Returns a reference to the entry corresponding to `path` if it exists.
154 pub fn get(&self, path: impl AsRef<HgPath>) -> Option<&DirstateEntry> {
155 if let Some(node) = self.get_node(&path) {
156 return match &node.kind {
157 NodeKind::Directory(d) => d.was_file.as_ref().map(|f| &f.entry),
158 NodeKind::File(f) => Some(&f.entry),
159 };
160 }
161 None
162 }
163
164 /// Returns `true` if an entry is found for the given `path`.
165 pub fn contains_key(&self, path: impl AsRef<HgPath>) -> bool {
166 self.get(path).is_some()
167 }
168
169 /// Returns a mutable reference to the entry corresponding to `path` if it
170 /// exists.
171 pub fn get_mut(&mut self, path: impl AsRef<HgPath>) -> Option<&mut DirstateEntry> {
172 if let Some(kind) = self.root.get_mut(path.as_ref().as_bytes()) {
173 return match kind {
174 NodeKind::Directory(d) => d.was_file.as_mut().map(|f| &mut f.entry),
175 NodeKind::File(f) => Some(&mut f.entry),
176 };
177 }
178 None
179 }
180
181 /// Returns an iterator over the paths and corresponding entries in the
182 /// tree.
183 pub fn iter(&self) -> Iter {
184 Iter::new(&self.root)
185 }
186
187 /// Returns an iterator of all entries in the tree, with a special
188 /// filesystem handling for the directories containing said entries. See
189 /// the documentation of `FsIter` for more.
190 pub fn fs_iter(&self, root_dir: PathBuf) -> FsIter {
191 FsIter::new(&self.root, root_dir)
192 }
193
194 /// Remove the entry at `path` and returns it, if it exists.
195 pub fn remove(&mut self, path: impl AsRef<HgPath>) -> Option<DirstateEntry> {
196 let RemoveResult { old_entry, .. } = self.root.remove(path.as_ref().as_bytes());
197 self.files_count = self
198 .files_count
199 .checked_sub(if old_entry.is_some() { 1 } else { 0 })
200 .expect("removed too many files");
201 old_entry
202 }
203 }
204
205 impl<P: AsRef<HgPath>> Extend<(P, DirstateEntry)> for Tree {
206 fn extend<T: IntoIterator<Item = (P, DirstateEntry)>>(&mut self, iter: T) {
207 for (path, entry) in iter {
208 self.insert(path, entry);
209 }
210 }
211 }
212
213 impl<'a> IntoIterator for &'a Tree {
214 type Item = (HgPathBuf, DirstateEntry);
215 type IntoIter = Iter<'a>;
216
217 fn into_iter(self) -> Self::IntoIter {
218 self.iter()
219 }
220 }
221
222 #[cfg(test)]
223 mod tests {
224 use super::*;
225 use crate::dirstate::dirstate_tree::node::File;
226 use crate::{EntryState, FastHashMap};
227 use pretty_assertions::assert_eq;
228
229 impl Node {
230 /// Shortcut for getting children of a node in tests.
231 fn children(&self) -> Option<&FastHashMap<Vec<u8>, Node>> {
232 match &self.kind {
233 NodeKind::Directory(d) => Some(&d.children),
234 NodeKind::File(_) => None,
235 }
236 }
237 }
238
239 #[test]
240 fn test_dirstate_tree() {
241 let mut tree = Tree::new();
242
243 assert_eq!(
244 tree.insert_node(
245 HgPath::new(b"we/p"),
246 DirstateEntry {
247 state: EntryState::Normal,
248 mode: 0,
249 mtime: 0,
250 size: 0
251 }
252 ),
253 None
254 );
255 dbg!(&tree);
256 assert!(tree.get_node(HgPath::new(b"we")).is_some());
257 let entry = DirstateEntry {
258 state: EntryState::Merged,
259 mode: 41,
260 mtime: 42,
261 size: 43,
262 };
263 assert_eq!(tree.insert_node(HgPath::new(b"foo/bar"), entry), None);
264 assert_eq!(
265 tree.get_node(HgPath::new(b"foo/bar")),
266 Some(&Node {
267 kind: NodeKind::File(File {
268 was_directory: None,
269 entry
270 })
271 })
272 );
273 // We didn't override the first entry we made
274 assert!(tree.get_node(HgPath::new(b"we")).is_some(),);
275 // Inserting the same key again
276 assert_eq!(
277 tree.insert_node(HgPath::new(b"foo/bar"), entry),
278 Some(Node {
279 kind: NodeKind::File(File {
280 was_directory: None,
281 entry
282 }),
283 })
284 );
285 // Inserting the two levels deep
286 assert_eq!(tree.insert_node(HgPath::new(b"foo/bar/baz"), entry), None);
287 // Getting a file "inside a file" should return `None`
288 assert_eq!(tree.get_node(HgPath::new(b"foo/bar/baz/bap"),), None);
289
290 assert_eq!(
291 tree.insert_node(HgPath::new(b"wasdir/subfile"), entry),
292 None,
293 );
294 let removed_entry = DirstateEntry {
295 state: EntryState::Removed,
296 mode: 0,
297 mtime: 0,
298 size: 0,
299 };
300 assert!(tree
301 .insert_node(HgPath::new(b"wasdir"), removed_entry)
302 .is_some());
303
304 assert_eq!(
305 tree.get_node(HgPath::new(b"wasdir")),
306 Some(&Node {
307 kind: NodeKind::File(File {
308 was_directory: Some(Box::new(Directory {
309 was_file: None,
310 children: [(
311 b"subfile".to_vec(),
312 Node {
313 kind: NodeKind::File(File {
314 was_directory: None,
315 entry,
316 })
317 }
318 )]
319 .to_vec()
320 .into_iter()
321 .collect()
322 })),
323 entry: removed_entry
324 })
325 })
326 );
327
328 assert!(tree.get(HgPath::new(b"wasdir/subfile")).is_some())
329 }
330
331 #[test]
332 fn test_insert_removed() {
333 let mut tree = Tree::new();
334 let entry = DirstateEntry {
335 state: EntryState::Merged,
336 mode: 1,
337 mtime: 2,
338 size: 3,
339 };
340 let removed_entry = DirstateEntry {
341 state: EntryState::Removed,
342 mode: 10,
343 mtime: 20,
344 size: 30,
345 };
346 assert_eq!(tree.insert_node(HgPath::new(b"foo"), entry), None);
347 assert_eq!(tree.insert_node(HgPath::new(b"foo/a"), removed_entry), None);
348 // The insert should not turn `foo` into a directory as `foo` is not
349 // `Removed`.
350 match tree.get_node(HgPath::new(b"foo")).unwrap().kind {
351 NodeKind::Directory(_) => panic!("should be a file"),
352 NodeKind::File(_) => {}
353 }
354
355 let mut tree = Tree::new();
356 let entry = DirstateEntry {
357 state: EntryState::Merged,
358 mode: 1,
359 mtime: 2,
360 size: 3,
361 };
362 let removed_entry = DirstateEntry {
363 state: EntryState::Removed,
364 mode: 10,
365 mtime: 20,
366 size: 30,
367 };
368 // The insert *should* turn `foo` into a directory as it is `Removed`.
369 assert_eq!(tree.insert_node(HgPath::new(b"foo"), removed_entry), None);
370 assert_eq!(tree.insert_node(HgPath::new(b"foo/a"), entry), None);
371 match tree.get_node(HgPath::new(b"foo")).unwrap().kind {
372 NodeKind::Directory(_) => {}
373 NodeKind::File(_) => panic!("should be a directory"),
374 }
375 }
376
377 #[test]
378 fn test_get() {
379 let mut tree = Tree::new();
380 let entry = DirstateEntry {
381 state: EntryState::Merged,
382 mode: 1,
383 mtime: 2,
384 size: 3,
385 };
386 assert_eq!(tree.insert_node(HgPath::new(b"a/b/c"), entry), None);
387 assert_eq!(tree.files_count, 1);
388 assert_eq!(tree.get(HgPath::new(b"a/b/c")), Some(&entry));
389 assert_eq!(tree.get(HgPath::new(b"a/b")), None);
390 assert_eq!(tree.get(HgPath::new(b"a")), None);
391 assert_eq!(tree.get(HgPath::new(b"a/b/c/d")), None);
392 let entry2 = DirstateEntry {
393 state: EntryState::Removed,
394 mode: 0,
395 mtime: 5,
396 size: 1,
397 };
398 // was_directory
399 assert_eq!(tree.insert(HgPath::new(b"a/b"), entry2), None);
400 assert_eq!(tree.files_count, 2);
401 assert_eq!(tree.get(HgPath::new(b"a/b")), Some(&entry2));
402 assert_eq!(tree.get(HgPath::new(b"a/b/c")), Some(&entry));
403
404 let mut tree = Tree::new();
405
406 // was_file
407 assert_eq!(tree.insert_node(HgPath::new(b"a"), entry), None);
408 assert_eq!(tree.files_count, 1);
409 assert_eq!(tree.insert_node(HgPath::new(b"a/b"), entry2), None);
410 assert_eq!(tree.files_count, 2);
411 assert_eq!(tree.get(HgPath::new(b"a/b")), Some(&entry2));
412 }
413
414 #[test]
415 fn test_get_mut() {
416 let mut tree = Tree::new();
417 let mut entry = DirstateEntry {
418 state: EntryState::Merged,
419 mode: 1,
420 mtime: 2,
421 size: 3,
422 };
423 assert_eq!(tree.insert_node(HgPath::new(b"a/b/c"), entry), None);
424 assert_eq!(tree.files_count, 1);
425 assert_eq!(tree.get_mut(HgPath::new(b"a/b/c")), Some(&mut entry));
426 assert_eq!(tree.get_mut(HgPath::new(b"a/b")), None);
427 assert_eq!(tree.get_mut(HgPath::new(b"a")), None);
428 assert_eq!(tree.get_mut(HgPath::new(b"a/b/c/d")), None);
429 let mut entry2 = DirstateEntry {
430 state: EntryState::Removed,
431 mode: 0,
432 mtime: 5,
433 size: 1,
434 };
435 // was_directory
436 assert_eq!(tree.insert(HgPath::new(b"a/b"), entry2), None);
437 assert_eq!(tree.files_count, 2);
438 assert_eq!(tree.get_mut(HgPath::new(b"a/b")), Some(&mut entry2));
439 assert_eq!(tree.get_mut(HgPath::new(b"a/b/c")), Some(&mut entry));
440
441 let mut tree = Tree::new();
442
443 // was_file
444 assert_eq!(tree.insert_node(HgPath::new(b"a"), entry), None);
445 assert_eq!(tree.files_count, 1);
446 assert_eq!(tree.insert_node(HgPath::new(b"a/b"), entry2), None);
447 assert_eq!(tree.files_count, 2);
448 assert_eq!(tree.get_mut(HgPath::new(b"a/b")), Some(&mut entry2));
449 }
450
451 #[test]
452 fn test_remove() {
453 let mut tree = Tree::new();
454 assert_eq!(tree.files_count, 0);
455 assert_eq!(tree.remove(HgPath::new(b"foo")), None);
456 assert_eq!(tree.files_count, 0);
457
458 let entry = DirstateEntry {
459 state: EntryState::Normal,
460 mode: 0,
461 mtime: 0,
462 size: 0,
463 };
464 assert_eq!(tree.insert_node(HgPath::new(b"a/b/c"), entry), None);
465 assert_eq!(tree.files_count, 1);
466
467 assert_eq!(tree.remove(HgPath::new(b"a/b/c")), Some(entry));
468 assert_eq!(tree.files_count, 0);
469
470 assert_eq!(tree.insert_node(HgPath::new(b"a/b/x"), entry), None);
471 assert_eq!(tree.insert_node(HgPath::new(b"a/b/y"), entry), None);
472 assert_eq!(tree.insert_node(HgPath::new(b"a/b/z"), entry), None);
473 assert_eq!(tree.insert_node(HgPath::new(b"x"), entry), None);
474 assert_eq!(tree.insert_node(HgPath::new(b"y"), entry), None);
475 assert_eq!(tree.files_count, 5);
476
477 assert_eq!(tree.remove(HgPath::new(b"a/b/x")), Some(entry));
478 assert_eq!(tree.files_count, 4);
479 assert_eq!(tree.remove(HgPath::new(b"a/b/x")), None);
480 assert_eq!(tree.files_count, 4);
481 assert_eq!(tree.remove(HgPath::new(b"a/b/y")), Some(entry));
482 assert_eq!(tree.files_count, 3);
483 assert_eq!(tree.remove(HgPath::new(b"a/b/z")), Some(entry));
484 assert_eq!(tree.files_count, 2);
485
486 assert_eq!(tree.remove(HgPath::new(b"x")), Some(entry));
487 assert_eq!(tree.files_count, 1);
488 assert_eq!(tree.remove(HgPath::new(b"y")), Some(entry));
489 assert_eq!(tree.files_count, 0);
490
491 // `a` should have been cleaned up, no more files anywhere in its
492 // descendents
493 assert_eq!(tree.get_node(HgPath::new(b"a")), None);
494 assert_eq!(tree.root.children().unwrap().len(), 0);
495
496 let removed_entry = DirstateEntry {
497 state: EntryState::Removed,
498 ..entry
499 };
500 assert_eq!(tree.insert(HgPath::new(b"a"), removed_entry), None);
501 assert_eq!(tree.insert_node(HgPath::new(b"a/b/x"), entry), None);
502 assert_eq!(tree.files_count, 2);
503 dbg!(&tree);
504 assert_eq!(tree.remove(HgPath::new(b"a")), Some(removed_entry));
505 assert_eq!(tree.files_count, 1);
506 dbg!(&tree);
507 assert_eq!(tree.remove(HgPath::new(b"a/b/x")), Some(entry));
508 assert_eq!(tree.files_count, 0);
509
510 // The entire tree should have been cleaned up, no more files anywhere
511 // in its descendents
512 assert_eq!(tree.root.children().unwrap().len(), 0);
513
514 let removed_entry = DirstateEntry {
515 state: EntryState::Removed,
516 ..entry
517 };
518 assert_eq!(tree.insert(HgPath::new(b"a"), entry), None);
519 assert_eq!(tree.insert_node(HgPath::new(b"a/b/x"), removed_entry), None);
520 assert_eq!(tree.files_count, 2);
521 dbg!(&tree);
522 assert_eq!(tree.remove(HgPath::new(b"a")), Some(entry));
523 assert_eq!(tree.files_count, 1);
524 dbg!(&tree);
525 assert_eq!(tree.remove(HgPath::new(b"a/b/x")), Some(removed_entry));
526 assert_eq!(tree.files_count, 0);
527
528 dbg!(&tree);
529 // The entire tree should have been cleaned up, no more files anywhere
530 // in its descendents
531 assert_eq!(tree.root.children().unwrap().len(), 0);
532
533 assert_eq!(tree.insert(HgPath::new(b"d"), entry), None);
534 assert_eq!(tree.insert(HgPath::new(b"d/d/d"), entry), None);
535 assert_eq!(tree.files_count, 2);
536
537 // Deleting the nested file should not delete the top directory as it
538 // used to be a file
539 assert_eq!(tree.remove(HgPath::new(b"d/d/d")), Some(entry));
540 assert_eq!(tree.files_count, 1);
541 assert!(tree.get_node(HgPath::new(b"d")).is_some());
542 assert!(tree.remove(HgPath::new(b"d")).is_some());
543 assert_eq!(tree.files_count, 0);
544
545 // Deleting the nested file should not delete the top file (other way
546 // around from the last case)
547 assert_eq!(tree.insert(HgPath::new(b"a/a"), entry), None);
548 assert_eq!(tree.files_count, 1);
549 assert_eq!(tree.insert(HgPath::new(b"a"), entry), None);
550 assert_eq!(tree.files_count, 2);
551 dbg!(&tree);
552 assert_eq!(tree.remove(HgPath::new(b"a/a")), Some(entry));
553 assert_eq!(tree.files_count, 1);
554 dbg!(&tree);
555 assert!(tree.get_node(HgPath::new(b"a")).is_some());
556 assert!(tree.get_node(HgPath::new(b"a/a")).is_none());
557 }
558
559 #[test]
560 fn test_was_directory() {
561 let mut tree = Tree::new();
562
563 let entry = DirstateEntry {
564 state: EntryState::Removed,
565 mode: 0,
566 mtime: 0,
567 size: 0,
568 };
569 assert_eq!(tree.insert_node(HgPath::new(b"a/b/c"), entry), None);
570 assert_eq!(tree.files_count, 1);
571
572 assert!(tree.insert_node(HgPath::new(b"a"), entry).is_some());
573 let new_a = tree.root.children().unwrap().get(&b"a".to_vec()).unwrap();
574
575 match &new_a.kind {
576 NodeKind::Directory(_) => panic!(),
577 NodeKind::File(f) => {
578 let dir = f.was_directory.clone().unwrap();
579 let c = dir
580 .children
581 .get(&b"b".to_vec())
582 .unwrap()
583 .children()
584 .unwrap()
585 .get(&b"c".to_vec())
586 .unwrap();
587
588 assert_eq!(
589 match &c.kind {
590 NodeKind::Directory(_) => panic!(),
591 NodeKind::File(f) => f.entry,
592 },
593 entry
594 );
595 }
596 }
597 assert_eq!(tree.files_count, 2);
598 dbg!(&tree);
599 assert_eq!(tree.remove(HgPath::new(b"a/b/c")), Some(entry));
600 assert_eq!(tree.files_count, 1);
601 dbg!(&tree);
602 let a = tree.get_node(HgPath::new(b"a")).unwrap();
603 match &a.kind {
604 NodeKind::Directory(_) => panic!(),
605 NodeKind::File(f) => {
606 // Directory in `was_directory` was emptied, should be removed
607 assert_eq!(f.was_directory, None);
608 }
609 }
610 }
611 #[test]
612 fn test_extend() {
613 let insertions = [
614 (
615 HgPathBuf::from_bytes(b"d"),
616 DirstateEntry {
617 state: EntryState::Added,
618 mode: 0,
619 mtime: -1,
620 size: -1,
621 },
622 ),
623 (
624 HgPathBuf::from_bytes(b"b"),
625 DirstateEntry {
626 state: EntryState::Normal,
627 mode: 33188,
628 mtime: 1599647984,
629 size: 2,
630 },
631 ),
632 (
633 HgPathBuf::from_bytes(b"a/a"),
634 DirstateEntry {
635 state: EntryState::Normal,
636 mode: 33188,
637 mtime: 1599647984,
638 size: 2,
639 },
640 ),
641 (
642 HgPathBuf::from_bytes(b"d/d/d"),
643 DirstateEntry {
644 state: EntryState::Removed,
645 mode: 0,
646 mtime: 0,
647 size: 0,
648 },
649 ),
650 ]
651 .to_vec();
652 let mut tree = Tree::new();
653
654 tree.extend(insertions.clone().into_iter());
655
656 for (path, _) in &insertions {
657 assert!(tree.contains_key(path), true);
658 }
659 assert_eq!(tree.files_count, 4);
660 }
661 }
@@ -11,6 +11,7 b' use std::convert::TryFrom;'
11
11
12 pub mod dirs_multiset;
12 pub mod dirs_multiset;
13 pub mod dirstate_map;
13 pub mod dirstate_map;
14 pub mod dirstate_tree;
14 pub mod parsers;
15 pub mod parsers;
15 pub mod status;
16 pub mod status;
16
17
General Comments 0
You need to be logged in to leave comments. Login now