##// END OF EJS Templates
dirstate-tree: Downgrade `&mut Node` to `&Node` in status and serialization...
Simon Sapin -
r48122:0252600f default
parent child Browse files
Show More
@@ -1,639 +1,639 b''
1 use bytes_cast::BytesCast;
1 use bytes_cast::BytesCast;
2 use micro_timer::timed;
2 use micro_timer::timed;
3 use std::borrow::Cow;
3 use std::borrow::Cow;
4 use std::convert::TryInto;
4 use std::convert::TryInto;
5 use std::path::PathBuf;
5 use std::path::PathBuf;
6
6
7 use super::on_disk;
7 use super::on_disk;
8 use super::path_with_basename::WithBasename;
8 use super::path_with_basename::WithBasename;
9 use crate::dirstate::parsers::pack_entry;
9 use crate::dirstate::parsers::pack_entry;
10 use crate::dirstate::parsers::packed_entry_size;
10 use crate::dirstate::parsers::packed_entry_size;
11 use crate::dirstate::parsers::parse_dirstate_entries;
11 use crate::dirstate::parsers::parse_dirstate_entries;
12 use crate::dirstate::parsers::Timestamp;
12 use crate::dirstate::parsers::Timestamp;
13 use crate::matchers::Matcher;
13 use crate::matchers::Matcher;
14 use crate::utils::hg_path::{HgPath, HgPathBuf};
14 use crate::utils::hg_path::{HgPath, HgPathBuf};
15 use crate::CopyMapIter;
15 use crate::CopyMapIter;
16 use crate::DirstateEntry;
16 use crate::DirstateEntry;
17 use crate::DirstateError;
17 use crate::DirstateError;
18 use crate::DirstateMapError;
18 use crate::DirstateMapError;
19 use crate::DirstateParents;
19 use crate::DirstateParents;
20 use crate::DirstateStatus;
20 use crate::DirstateStatus;
21 use crate::EntryState;
21 use crate::EntryState;
22 use crate::FastHashMap;
22 use crate::FastHashMap;
23 use crate::PatternFileWarning;
23 use crate::PatternFileWarning;
24 use crate::StateMapIter;
24 use crate::StateMapIter;
25 use crate::StatusError;
25 use crate::StatusError;
26 use crate::StatusOptions;
26 use crate::StatusOptions;
27
27
28 pub struct DirstateMap<'on_disk> {
28 pub struct DirstateMap<'on_disk> {
29 /// Contents of the `.hg/dirstate` file
29 /// Contents of the `.hg/dirstate` file
30 pub(super) on_disk: &'on_disk [u8],
30 pub(super) on_disk: &'on_disk [u8],
31
31
32 pub(super) root: ChildNodes<'on_disk>,
32 pub(super) root: ChildNodes<'on_disk>,
33
33
34 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
34 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
35 pub(super) nodes_with_entry_count: u32,
35 pub(super) nodes_with_entry_count: u32,
36
36
37 /// Number of nodes anywhere in the tree that have
37 /// Number of nodes anywhere in the tree that have
38 /// `.copy_source.is_some()`.
38 /// `.copy_source.is_some()`.
39 pub(super) nodes_with_copy_source_count: u32,
39 pub(super) nodes_with_copy_source_count: u32,
40 }
40 }
41
41
42 /// Using a plain `HgPathBuf` of the full path from the repository root as a
42 /// Using a plain `HgPathBuf` of the full path from the repository root as a
43 /// map key would also work: all paths in a given map have the same parent
43 /// map key would also work: all paths in a given map have the same parent
44 /// path, so comparing full paths gives the same result as comparing base
44 /// path, so comparing full paths gives the same result as comparing base
45 /// names. However `HashMap` would waste time always re-hashing the same
45 /// names. However `HashMap` would waste time always re-hashing the same
46 /// string prefix.
46 /// string prefix.
47 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
47 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
48 pub(super) type ChildNodes<'on_disk> =
48 pub(super) type ChildNodes<'on_disk> =
49 FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>;
49 FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>;
50
50
51 /// Represents a file or a directory
51 /// Represents a file or a directory
52 #[derive(Default)]
52 #[derive(Default)]
53 pub(super) struct Node<'on_disk> {
53 pub(super) struct Node<'on_disk> {
54 /// `None` for directories
54 /// `None` for directories
55 pub(super) entry: Option<DirstateEntry>,
55 pub(super) entry: Option<DirstateEntry>,
56
56
57 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
57 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
58
58
59 pub(super) children: ChildNodes<'on_disk>,
59 pub(super) children: ChildNodes<'on_disk>,
60
60
61 /// How many (non-inclusive) descendants of this node are tracked files
61 /// How many (non-inclusive) descendants of this node are tracked files
62 pub(super) tracked_descendants_count: u32,
62 pub(super) tracked_descendants_count: u32,
63 }
63 }
64
64
65 impl<'on_disk> Node<'on_disk> {
65 impl<'on_disk> Node<'on_disk> {
66 pub(super) fn state(&self) -> Option<EntryState> {
66 pub(super) fn state(&self) -> Option<EntryState> {
67 self.entry.as_ref().map(|entry| entry.state)
67 self.entry.as_ref().map(|entry| entry.state)
68 }
68 }
69
69
70 pub(super) fn sorted<'tree>(
70 pub(super) fn sorted<'tree>(
71 nodes: &'tree mut ChildNodes<'on_disk>,
71 nodes: &'tree ChildNodes<'on_disk>,
72 ) -> Vec<(&'tree NodeKey<'on_disk>, &'tree mut Self)> {
72 ) -> Vec<(&'tree NodeKey<'on_disk>, &'tree Self)> {
73 let mut vec: Vec<_> = nodes.iter_mut().collect();
73 let mut vec: Vec<_> = nodes.iter().collect();
74 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
74 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
75 // https://github.com/rust-lang/rust/issues/34162
75 // https://github.com/rust-lang/rust/issues/34162
76 vec.sort_unstable_by(|(path1, _), (path2, _)| path1.cmp(path2));
76 vec.sort_unstable_by(|(path1, _), (path2, _)| path1.cmp(path2));
77 vec
77 vec
78 }
78 }
79 }
79 }
80
80
81 impl<'on_disk> DirstateMap<'on_disk> {
81 impl<'on_disk> DirstateMap<'on_disk> {
82 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
82 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
83 Self {
83 Self {
84 on_disk,
84 on_disk,
85 root: ChildNodes::default(),
85 root: ChildNodes::default(),
86 nodes_with_entry_count: 0,
86 nodes_with_entry_count: 0,
87 nodes_with_copy_source_count: 0,
87 nodes_with_copy_source_count: 0,
88 }
88 }
89 }
89 }
90
90
91 #[timed]
91 #[timed]
92 pub fn new_v2(
92 pub fn new_v2(
93 on_disk: &'on_disk [u8],
93 on_disk: &'on_disk [u8],
94 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
94 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
95 on_disk::read(on_disk)
95 on_disk::read(on_disk)
96 }
96 }
97
97
98 #[timed]
98 #[timed]
99 pub fn new_v1(
99 pub fn new_v1(
100 on_disk: &'on_disk [u8],
100 on_disk: &'on_disk [u8],
101 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
101 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
102 let mut map = Self::empty(on_disk);
102 let mut map = Self::empty(on_disk);
103 if map.on_disk.is_empty() {
103 if map.on_disk.is_empty() {
104 return Ok((map, None));
104 return Ok((map, None));
105 }
105 }
106
106
107 let parents = parse_dirstate_entries(
107 let parents = parse_dirstate_entries(
108 map.on_disk,
108 map.on_disk,
109 |path, entry, copy_source| {
109 |path, entry, copy_source| {
110 let tracked = entry.state.is_tracked();
110 let tracked = entry.state.is_tracked();
111 let node = Self::get_or_insert_node(
111 let node = Self::get_or_insert_node(
112 &mut map.root,
112 &mut map.root,
113 path,
113 path,
114 WithBasename::to_cow_borrowed,
114 WithBasename::to_cow_borrowed,
115 |ancestor| {
115 |ancestor| {
116 if tracked {
116 if tracked {
117 ancestor.tracked_descendants_count += 1
117 ancestor.tracked_descendants_count += 1
118 }
118 }
119 },
119 },
120 );
120 );
121 assert!(
121 assert!(
122 node.entry.is_none(),
122 node.entry.is_none(),
123 "duplicate dirstate entry in read"
123 "duplicate dirstate entry in read"
124 );
124 );
125 assert!(
125 assert!(
126 node.copy_source.is_none(),
126 node.copy_source.is_none(),
127 "duplicate dirstate entry in read"
127 "duplicate dirstate entry in read"
128 );
128 );
129 node.entry = Some(*entry);
129 node.entry = Some(*entry);
130 node.copy_source = copy_source.map(Cow::Borrowed);
130 node.copy_source = copy_source.map(Cow::Borrowed);
131 map.nodes_with_entry_count += 1;
131 map.nodes_with_entry_count += 1;
132 if copy_source.is_some() {
132 if copy_source.is_some() {
133 map.nodes_with_copy_source_count += 1
133 map.nodes_with_copy_source_count += 1
134 }
134 }
135 },
135 },
136 )?;
136 )?;
137 let parents = Some(parents.clone());
137 let parents = Some(parents.clone());
138
138
139 Ok((map, parents))
139 Ok((map, parents))
140 }
140 }
141
141
142 fn get_node(&self, path: &HgPath) -> Option<&Node> {
142 fn get_node(&self, path: &HgPath) -> Option<&Node> {
143 let mut children = &self.root;
143 let mut children = &self.root;
144 let mut components = path.components();
144 let mut components = path.components();
145 let mut component =
145 let mut component =
146 components.next().expect("expected at least one components");
146 components.next().expect("expected at least one components");
147 loop {
147 loop {
148 let child = children.get(component)?;
148 let child = children.get(component)?;
149 if let Some(next_component) = components.next() {
149 if let Some(next_component) = components.next() {
150 component = next_component;
150 component = next_component;
151 children = &child.children;
151 children = &child.children;
152 } else {
152 } else {
153 return Some(child);
153 return Some(child);
154 }
154 }
155 }
155 }
156 }
156 }
157
157
158 /// Returns a mutable reference to the node at `path` if it exists
158 /// Returns a mutable reference to the node at `path` if it exists
159 ///
159 ///
160 /// This takes `root` instead of `&mut self` so that callers can mutate
160 /// This takes `root` instead of `&mut self` so that callers can mutate
161 /// other fields while the returned borrow is still valid
161 /// other fields while the returned borrow is still valid
162 fn get_node_mut<'tree>(
162 fn get_node_mut<'tree>(
163 root: &'tree mut ChildNodes<'on_disk>,
163 root: &'tree mut ChildNodes<'on_disk>,
164 path: &HgPath,
164 path: &HgPath,
165 ) -> Option<&'tree mut Node<'on_disk>> {
165 ) -> Option<&'tree mut Node<'on_disk>> {
166 let mut children = root;
166 let mut children = root;
167 let mut components = path.components();
167 let mut components = path.components();
168 let mut component =
168 let mut component =
169 components.next().expect("expected at least one components");
169 components.next().expect("expected at least one components");
170 loop {
170 loop {
171 let child = children.get_mut(component)?;
171 let child = children.get_mut(component)?;
172 if let Some(next_component) = components.next() {
172 if let Some(next_component) = components.next() {
173 component = next_component;
173 component = next_component;
174 children = &mut child.children;
174 children = &mut child.children;
175 } else {
175 } else {
176 return Some(child);
176 return Some(child);
177 }
177 }
178 }
178 }
179 }
179 }
180
180
181 fn get_or_insert_node<'tree, 'path>(
181 fn get_or_insert_node<'tree, 'path>(
182 root: &'tree mut ChildNodes<'on_disk>,
182 root: &'tree mut ChildNodes<'on_disk>,
183 path: &'path HgPath,
183 path: &'path HgPath,
184 to_cow: impl Fn(
184 to_cow: impl Fn(
185 WithBasename<&'path HgPath>,
185 WithBasename<&'path HgPath>,
186 ) -> WithBasename<Cow<'on_disk, HgPath>>,
186 ) -> WithBasename<Cow<'on_disk, HgPath>>,
187 mut each_ancestor: impl FnMut(&mut Node),
187 mut each_ancestor: impl FnMut(&mut Node),
188 ) -> &'tree mut Node<'on_disk> {
188 ) -> &'tree mut Node<'on_disk> {
189 let mut child_nodes = root;
189 let mut child_nodes = root;
190 let mut inclusive_ancestor_paths =
190 let mut inclusive_ancestor_paths =
191 WithBasename::inclusive_ancestors_of(path);
191 WithBasename::inclusive_ancestors_of(path);
192 let mut ancestor_path = inclusive_ancestor_paths
192 let mut ancestor_path = inclusive_ancestor_paths
193 .next()
193 .next()
194 .expect("expected at least one inclusive ancestor");
194 .expect("expected at least one inclusive ancestor");
195 loop {
195 loop {
196 // TODO: can we avoid allocating an owned key in cases where the
196 // TODO: can we avoid allocating an owned key in cases where the
197 // map already contains that key, without introducing double
197 // map already contains that key, without introducing double
198 // lookup?
198 // lookup?
199 let child_node =
199 let child_node =
200 child_nodes.entry(to_cow(ancestor_path)).or_default();
200 child_nodes.entry(to_cow(ancestor_path)).or_default();
201 if let Some(next) = inclusive_ancestor_paths.next() {
201 if let Some(next) = inclusive_ancestor_paths.next() {
202 each_ancestor(child_node);
202 each_ancestor(child_node);
203 ancestor_path = next;
203 ancestor_path = next;
204 child_nodes = &mut child_node.children;
204 child_nodes = &mut child_node.children;
205 } else {
205 } else {
206 return child_node;
206 return child_node;
207 }
207 }
208 }
208 }
209 }
209 }
210
210
211 fn add_or_remove_file(
211 fn add_or_remove_file(
212 &mut self,
212 &mut self,
213 path: &HgPath,
213 path: &HgPath,
214 old_state: EntryState,
214 old_state: EntryState,
215 new_entry: DirstateEntry,
215 new_entry: DirstateEntry,
216 ) {
216 ) {
217 let tracked_count_increment =
217 let tracked_count_increment =
218 match (old_state.is_tracked(), new_entry.state.is_tracked()) {
218 match (old_state.is_tracked(), new_entry.state.is_tracked()) {
219 (false, true) => 1,
219 (false, true) => 1,
220 (true, false) => -1,
220 (true, false) => -1,
221 _ => 0,
221 _ => 0,
222 };
222 };
223
223
224 let node = Self::get_or_insert_node(
224 let node = Self::get_or_insert_node(
225 &mut self.root,
225 &mut self.root,
226 path,
226 path,
227 WithBasename::to_cow_owned,
227 WithBasename::to_cow_owned,
228 |ancestor| {
228 |ancestor| {
229 // We can’t use `+= increment` because the counter is unsigned,
229 // We can’t use `+= increment` because the counter is unsigned,
230 // and we want debug builds to detect accidental underflow
230 // and we want debug builds to detect accidental underflow
231 // through zero
231 // through zero
232 match tracked_count_increment {
232 match tracked_count_increment {
233 1 => ancestor.tracked_descendants_count += 1,
233 1 => ancestor.tracked_descendants_count += 1,
234 -1 => ancestor.tracked_descendants_count -= 1,
234 -1 => ancestor.tracked_descendants_count -= 1,
235 _ => {}
235 _ => {}
236 }
236 }
237 },
237 },
238 );
238 );
239 if node.entry.is_none() {
239 if node.entry.is_none() {
240 self.nodes_with_entry_count += 1
240 self.nodes_with_entry_count += 1
241 }
241 }
242 node.entry = Some(new_entry)
242 node.entry = Some(new_entry)
243 }
243 }
244
244
245 fn iter_nodes<'a>(
245 fn iter_nodes<'a>(
246 &'a self,
246 &'a self,
247 ) -> impl Iterator<Item = (&'a Cow<'on_disk, HgPath>, &'a Node)> + 'a {
247 ) -> impl Iterator<Item = (&'a Cow<'on_disk, HgPath>, &'a Node)> + 'a {
248 // Depth first tree traversal.
248 // Depth first tree traversal.
249 //
249 //
250 // If we could afford internal iteration and recursion,
250 // If we could afford internal iteration and recursion,
251 // this would look like:
251 // this would look like:
252 //
252 //
253 // ```
253 // ```
254 // fn traverse_children(
254 // fn traverse_children(
255 // children: &ChildNodes,
255 // children: &ChildNodes,
256 // each: &mut impl FnMut(&Node),
256 // each: &mut impl FnMut(&Node),
257 // ) {
257 // ) {
258 // for child in children.values() {
258 // for child in children.values() {
259 // traverse_children(&child.children, each);
259 // traverse_children(&child.children, each);
260 // each(child);
260 // each(child);
261 // }
261 // }
262 // }
262 // }
263 // ```
263 // ```
264 //
264 //
265 // However we want an external iterator and therefore can’t use the
265 // However we want an external iterator and therefore can’t use the
266 // call stack. Use an explicit stack instead:
266 // call stack. Use an explicit stack instead:
267 let mut stack = Vec::new();
267 let mut stack = Vec::new();
268 let mut iter = self.root.iter();
268 let mut iter = self.root.iter();
269 std::iter::from_fn(move || {
269 std::iter::from_fn(move || {
270 while let Some((key, child_node)) = iter.next() {
270 while let Some((key, child_node)) = iter.next() {
271 // Pseudo-recursion
271 // Pseudo-recursion
272 let new_iter = child_node.children.iter();
272 let new_iter = child_node.children.iter();
273 let old_iter = std::mem::replace(&mut iter, new_iter);
273 let old_iter = std::mem::replace(&mut iter, new_iter);
274 let key = key.full_path();
274 let key = key.full_path();
275 stack.push((key, child_node, old_iter));
275 stack.push((key, child_node, old_iter));
276 }
276 }
277 // Found the end of a `children.iter()` iterator.
277 // Found the end of a `children.iter()` iterator.
278 if let Some((key, child_node, next_iter)) = stack.pop() {
278 if let Some((key, child_node, next_iter)) = stack.pop() {
279 // "Return" from pseudo-recursion by restoring state from the
279 // "Return" from pseudo-recursion by restoring state from the
280 // explicit stack
280 // explicit stack
281 iter = next_iter;
281 iter = next_iter;
282
282
283 Some((key, child_node))
283 Some((key, child_node))
284 } else {
284 } else {
285 // Reached the bottom of the stack, we’re done
285 // Reached the bottom of the stack, we’re done
286 None
286 None
287 }
287 }
288 })
288 })
289 }
289 }
290
290
291 fn clear_known_ambiguous_mtimes(&mut self, paths: &[impl AsRef<HgPath>]) {
291 fn clear_known_ambiguous_mtimes(&mut self, paths: &[impl AsRef<HgPath>]) {
292 for path in paths {
292 for path in paths {
293 if let Some(node) =
293 if let Some(node) =
294 Self::get_node_mut(&mut self.root, path.as_ref())
294 Self::get_node_mut(&mut self.root, path.as_ref())
295 {
295 {
296 if let Some(entry) = node.entry.as_mut() {
296 if let Some(entry) = node.entry.as_mut() {
297 entry.clear_mtime();
297 entry.clear_mtime();
298 }
298 }
299 }
299 }
300 }
300 }
301 }
301 }
302 }
302 }
303
303
304 impl<'on_disk> super::dispatch::DirstateMapMethods for DirstateMap<'on_disk> {
304 impl<'on_disk> super::dispatch::DirstateMapMethods for DirstateMap<'on_disk> {
305 fn clear(&mut self) {
305 fn clear(&mut self) {
306 self.root.clear();
306 self.root.clear();
307 self.nodes_with_entry_count = 0;
307 self.nodes_with_entry_count = 0;
308 self.nodes_with_copy_source_count = 0;
308 self.nodes_with_copy_source_count = 0;
309 }
309 }
310
310
311 fn add_file(
311 fn add_file(
312 &mut self,
312 &mut self,
313 filename: &HgPath,
313 filename: &HgPath,
314 old_state: EntryState,
314 old_state: EntryState,
315 entry: DirstateEntry,
315 entry: DirstateEntry,
316 ) -> Result<(), DirstateMapError> {
316 ) -> Result<(), DirstateMapError> {
317 self.add_or_remove_file(filename, old_state, entry);
317 self.add_or_remove_file(filename, old_state, entry);
318 Ok(())
318 Ok(())
319 }
319 }
320
320
321 fn remove_file(
321 fn remove_file(
322 &mut self,
322 &mut self,
323 filename: &HgPath,
323 filename: &HgPath,
324 old_state: EntryState,
324 old_state: EntryState,
325 size: i32,
325 size: i32,
326 ) -> Result<(), DirstateMapError> {
326 ) -> Result<(), DirstateMapError> {
327 let entry = DirstateEntry {
327 let entry = DirstateEntry {
328 state: EntryState::Removed,
328 state: EntryState::Removed,
329 mode: 0,
329 mode: 0,
330 size,
330 size,
331 mtime: 0,
331 mtime: 0,
332 };
332 };
333 self.add_or_remove_file(filename, old_state, entry);
333 self.add_or_remove_file(filename, old_state, entry);
334 Ok(())
334 Ok(())
335 }
335 }
336
336
337 fn drop_file(
337 fn drop_file(
338 &mut self,
338 &mut self,
339 filename: &HgPath,
339 filename: &HgPath,
340 old_state: EntryState,
340 old_state: EntryState,
341 ) -> Result<bool, DirstateMapError> {
341 ) -> Result<bool, DirstateMapError> {
342 struct Dropped {
342 struct Dropped {
343 was_tracked: bool,
343 was_tracked: bool,
344 had_entry: bool,
344 had_entry: bool,
345 had_copy_source: bool,
345 had_copy_source: bool,
346 }
346 }
347 fn recur(nodes: &mut ChildNodes, path: &HgPath) -> Option<Dropped> {
347 fn recur(nodes: &mut ChildNodes, path: &HgPath) -> Option<Dropped> {
348 let (first_path_component, rest_of_path) =
348 let (first_path_component, rest_of_path) =
349 path.split_first_component();
349 path.split_first_component();
350 let node = nodes.get_mut(first_path_component)?;
350 let node = nodes.get_mut(first_path_component)?;
351 let dropped;
351 let dropped;
352 if let Some(rest) = rest_of_path {
352 if let Some(rest) = rest_of_path {
353 dropped = recur(&mut node.children, rest)?;
353 dropped = recur(&mut node.children, rest)?;
354 if dropped.was_tracked {
354 if dropped.was_tracked {
355 node.tracked_descendants_count -= 1;
355 node.tracked_descendants_count -= 1;
356 }
356 }
357 } else {
357 } else {
358 dropped = Dropped {
358 dropped = Dropped {
359 was_tracked: node
359 was_tracked: node
360 .entry
360 .entry
361 .as_ref()
361 .as_ref()
362 .map_or(false, |entry| entry.state.is_tracked()),
362 .map_or(false, |entry| entry.state.is_tracked()),
363 had_entry: node.entry.take().is_some(),
363 had_entry: node.entry.take().is_some(),
364 had_copy_source: node.copy_source.take().is_some(),
364 had_copy_source: node.copy_source.take().is_some(),
365 };
365 };
366 }
366 }
367 // After recursion, for both leaf (rest_of_path is None) nodes and
367 // After recursion, for both leaf (rest_of_path is None) nodes and
368 // parent nodes, remove a node if it just became empty.
368 // parent nodes, remove a node if it just became empty.
369 if node.entry.is_none()
369 if node.entry.is_none()
370 && node.copy_source.is_none()
370 && node.copy_source.is_none()
371 && node.children.is_empty()
371 && node.children.is_empty()
372 {
372 {
373 nodes.remove(first_path_component);
373 nodes.remove(first_path_component);
374 }
374 }
375 Some(dropped)
375 Some(dropped)
376 }
376 }
377
377
378 if let Some(dropped) = recur(&mut self.root, filename) {
378 if let Some(dropped) = recur(&mut self.root, filename) {
379 if dropped.had_entry {
379 if dropped.had_entry {
380 self.nodes_with_entry_count -= 1
380 self.nodes_with_entry_count -= 1
381 }
381 }
382 if dropped.had_copy_source {
382 if dropped.had_copy_source {
383 self.nodes_with_copy_source_count -= 1
383 self.nodes_with_copy_source_count -= 1
384 }
384 }
385 Ok(dropped.had_entry)
385 Ok(dropped.had_entry)
386 } else {
386 } else {
387 debug_assert!(!old_state.is_tracked());
387 debug_assert!(!old_state.is_tracked());
388 Ok(false)
388 Ok(false)
389 }
389 }
390 }
390 }
391
391
392 fn clear_ambiguous_times(&mut self, filenames: Vec<HgPathBuf>, now: i32) {
392 fn clear_ambiguous_times(&mut self, filenames: Vec<HgPathBuf>, now: i32) {
393 for filename in filenames {
393 for filename in filenames {
394 if let Some(node) = Self::get_node_mut(&mut self.root, &filename) {
394 if let Some(node) = Self::get_node_mut(&mut self.root, &filename) {
395 if let Some(entry) = node.entry.as_mut() {
395 if let Some(entry) = node.entry.as_mut() {
396 entry.clear_ambiguous_mtime(now);
396 entry.clear_ambiguous_mtime(now);
397 }
397 }
398 }
398 }
399 }
399 }
400 }
400 }
401
401
402 fn non_normal_entries_contains(&mut self, key: &HgPath) -> bool {
402 fn non_normal_entries_contains(&mut self, key: &HgPath) -> bool {
403 self.get_node(key)
403 self.get_node(key)
404 .and_then(|node| node.entry.as_ref())
404 .and_then(|node| node.entry.as_ref())
405 .map_or(false, DirstateEntry::is_non_normal)
405 .map_or(false, DirstateEntry::is_non_normal)
406 }
406 }
407
407
408 fn non_normal_entries_remove(&mut self, _key: &HgPath) {
408 fn non_normal_entries_remove(&mut self, _key: &HgPath) {
409 // Do nothing, this `DirstateMap` does not have a separate "non normal
409 // Do nothing, this `DirstateMap` does not have a separate "non normal
410 // entries" set that need to be kept up to date
410 // entries" set that need to be kept up to date
411 }
411 }
412
412
413 fn non_normal_or_other_parent_paths(
413 fn non_normal_or_other_parent_paths(
414 &mut self,
414 &mut self,
415 ) -> Box<dyn Iterator<Item = &HgPath> + '_> {
415 ) -> Box<dyn Iterator<Item = &HgPath> + '_> {
416 Box::new(self.iter_nodes().filter_map(|(path, node)| {
416 Box::new(self.iter_nodes().filter_map(|(path, node)| {
417 node.entry
417 node.entry
418 .as_ref()
418 .as_ref()
419 .filter(|entry| {
419 .filter(|entry| {
420 entry.is_non_normal() || entry.is_from_other_parent()
420 entry.is_non_normal() || entry.is_from_other_parent()
421 })
421 })
422 .map(|_| &**path)
422 .map(|_| &**path)
423 }))
423 }))
424 }
424 }
425
425
426 fn set_non_normal_other_parent_entries(&mut self, _force: bool) {
426 fn set_non_normal_other_parent_entries(&mut self, _force: bool) {
427 // Do nothing, this `DirstateMap` does not have a separate "non normal
427 // Do nothing, this `DirstateMap` does not have a separate "non normal
428 // entries" and "from other parent" sets that need to be recomputed
428 // entries" and "from other parent" sets that need to be recomputed
429 }
429 }
430
430
431 fn iter_non_normal_paths(
431 fn iter_non_normal_paths(
432 &mut self,
432 &mut self,
433 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
433 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
434 self.iter_non_normal_paths_panic()
434 self.iter_non_normal_paths_panic()
435 }
435 }
436
436
437 fn iter_non_normal_paths_panic(
437 fn iter_non_normal_paths_panic(
438 &self,
438 &self,
439 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
439 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
440 Box::new(self.iter_nodes().filter_map(|(path, node)| {
440 Box::new(self.iter_nodes().filter_map(|(path, node)| {
441 node.entry
441 node.entry
442 .as_ref()
442 .as_ref()
443 .filter(|entry| entry.is_non_normal())
443 .filter(|entry| entry.is_non_normal())
444 .map(|_| &**path)
444 .map(|_| &**path)
445 }))
445 }))
446 }
446 }
447
447
448 fn iter_other_parent_paths(
448 fn iter_other_parent_paths(
449 &mut self,
449 &mut self,
450 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
450 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
451 Box::new(self.iter_nodes().filter_map(|(path, node)| {
451 Box::new(self.iter_nodes().filter_map(|(path, node)| {
452 node.entry
452 node.entry
453 .as_ref()
453 .as_ref()
454 .filter(|entry| entry.is_from_other_parent())
454 .filter(|entry| entry.is_from_other_parent())
455 .map(|_| &**path)
455 .map(|_| &**path)
456 }))
456 }))
457 }
457 }
458
458
459 fn has_tracked_dir(
459 fn has_tracked_dir(
460 &mut self,
460 &mut self,
461 directory: &HgPath,
461 directory: &HgPath,
462 ) -> Result<bool, DirstateMapError> {
462 ) -> Result<bool, DirstateMapError> {
463 if let Some(node) = self.get_node(directory) {
463 if let Some(node) = self.get_node(directory) {
464 // A node without a `DirstateEntry` was created to hold child
464 // A node without a `DirstateEntry` was created to hold child
465 // nodes, and is therefore a directory.
465 // nodes, and is therefore a directory.
466 Ok(node.entry.is_none() && node.tracked_descendants_count > 0)
466 Ok(node.entry.is_none() && node.tracked_descendants_count > 0)
467 } else {
467 } else {
468 Ok(false)
468 Ok(false)
469 }
469 }
470 }
470 }
471
471
472 fn has_dir(
472 fn has_dir(
473 &mut self,
473 &mut self,
474 directory: &HgPath,
474 directory: &HgPath,
475 ) -> Result<bool, DirstateMapError> {
475 ) -> Result<bool, DirstateMapError> {
476 if let Some(node) = self.get_node(directory) {
476 if let Some(node) = self.get_node(directory) {
477 // A node without a `DirstateEntry` was created to hold child
477 // A node without a `DirstateEntry` was created to hold child
478 // nodes, and is therefore a directory.
478 // nodes, and is therefore a directory.
479 Ok(node.entry.is_none())
479 Ok(node.entry.is_none())
480 } else {
480 } else {
481 Ok(false)
481 Ok(false)
482 }
482 }
483 }
483 }
484
484
485 #[timed]
485 #[timed]
486 fn pack_v1(
486 fn pack_v1(
487 &mut self,
487 &mut self,
488 parents: DirstateParents,
488 parents: DirstateParents,
489 now: Timestamp,
489 now: Timestamp,
490 ) -> Result<Vec<u8>, DirstateError> {
490 ) -> Result<Vec<u8>, DirstateError> {
491 let now: i32 = now.0.try_into().expect("time overflow");
491 let now: i32 = now.0.try_into().expect("time overflow");
492 let mut ambiguous_mtimes = Vec::new();
492 let mut ambiguous_mtimes = Vec::new();
493 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
493 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
494 // reallocations
494 // reallocations
495 let mut size = parents.as_bytes().len();
495 let mut size = parents.as_bytes().len();
496 for (path, node) in self.iter_nodes() {
496 for (path, node) in self.iter_nodes() {
497 if let Some(entry) = &node.entry {
497 if let Some(entry) = &node.entry {
498 size += packed_entry_size(
498 size += packed_entry_size(
499 path,
499 path,
500 node.copy_source.as_ref().map(|p| &**p),
500 node.copy_source.as_ref().map(|p| &**p),
501 );
501 );
502 if entry.mtime_is_ambiguous(now) {
502 if entry.mtime_is_ambiguous(now) {
503 ambiguous_mtimes.push(path.clone())
503 ambiguous_mtimes.push(path.clone())
504 }
504 }
505 }
505 }
506 }
506 }
507 self.clear_known_ambiguous_mtimes(&ambiguous_mtimes);
507 self.clear_known_ambiguous_mtimes(&ambiguous_mtimes);
508
508
509 let mut packed = Vec::with_capacity(size);
509 let mut packed = Vec::with_capacity(size);
510 packed.extend(parents.as_bytes());
510 packed.extend(parents.as_bytes());
511
511
512 for (path, node) in self.iter_nodes() {
512 for (path, node) in self.iter_nodes() {
513 if let Some(entry) = &node.entry {
513 if let Some(entry) = &node.entry {
514 pack_entry(
514 pack_entry(
515 path,
515 path,
516 entry,
516 entry,
517 node.copy_source.as_ref().map(|p| &**p),
517 node.copy_source.as_ref().map(|p| &**p),
518 &mut packed,
518 &mut packed,
519 );
519 );
520 }
520 }
521 }
521 }
522 Ok(packed)
522 Ok(packed)
523 }
523 }
524
524
525 #[timed]
525 #[timed]
526 fn pack_v2(
526 fn pack_v2(
527 &mut self,
527 &mut self,
528 parents: DirstateParents,
528 parents: DirstateParents,
529 now: Timestamp,
529 now: Timestamp,
530 ) -> Result<Vec<u8>, DirstateError> {
530 ) -> Result<Vec<u8>, DirstateError> {
531 // TODO: how do we want to handle this in 2038?
531 // TODO: how do we want to handle this in 2038?
532 let now: i32 = now.0.try_into().expect("time overflow");
532 let now: i32 = now.0.try_into().expect("time overflow");
533 let mut paths = Vec::new();
533 let mut paths = Vec::new();
534 for (path, node) in self.iter_nodes() {
534 for (path, node) in self.iter_nodes() {
535 if let Some(entry) = &node.entry {
535 if let Some(entry) = &node.entry {
536 if entry.mtime_is_ambiguous(now) {
536 if entry.mtime_is_ambiguous(now) {
537 paths.push(path.clone())
537 paths.push(path.clone())
538 }
538 }
539 }
539 }
540 }
540 }
541 // Borrow of `self` ends here since we collect cloned paths
541 // Borrow of `self` ends here since we collect cloned paths
542
542
543 self.clear_known_ambiguous_mtimes(&paths);
543 self.clear_known_ambiguous_mtimes(&paths);
544
544
545 on_disk::write(self, parents)
545 on_disk::write(self, parents)
546 }
546 }
547
547
548 fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> {
548 fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> {
549 // Do nothing, this `DirstateMap` does not a separate `all_dirs` that
549 // Do nothing, this `DirstateMap` does not a separate `all_dirs` that
550 // needs to be recomputed
550 // needs to be recomputed
551 Ok(())
551 Ok(())
552 }
552 }
553
553
554 fn set_dirs(&mut self) -> Result<(), DirstateMapError> {
554 fn set_dirs(&mut self) -> Result<(), DirstateMapError> {
555 // Do nothing, this `DirstateMap` does not a separate `dirs` that needs
555 // Do nothing, this `DirstateMap` does not a separate `dirs` that needs
556 // to be recomputed
556 // to be recomputed
557 Ok(())
557 Ok(())
558 }
558 }
559
559
560 fn status<'a>(
560 fn status<'a>(
561 &'a mut self,
561 &'a mut self,
562 matcher: &'a (dyn Matcher + Sync),
562 matcher: &'a (dyn Matcher + Sync),
563 root_dir: PathBuf,
563 root_dir: PathBuf,
564 ignore_files: Vec<PathBuf>,
564 ignore_files: Vec<PathBuf>,
565 options: StatusOptions,
565 options: StatusOptions,
566 ) -> Result<(DirstateStatus<'a>, Vec<PatternFileWarning>), StatusError>
566 ) -> Result<(DirstateStatus<'a>, Vec<PatternFileWarning>), StatusError>
567 {
567 {
568 super::status::status(self, matcher, root_dir, ignore_files, options)
568 super::status::status(self, matcher, root_dir, ignore_files, options)
569 }
569 }
570
570
571 fn copy_map_len(&self) -> usize {
571 fn copy_map_len(&self) -> usize {
572 self.nodes_with_copy_source_count as usize
572 self.nodes_with_copy_source_count as usize
573 }
573 }
574
574
575 fn copy_map_iter(&self) -> CopyMapIter<'_> {
575 fn copy_map_iter(&self) -> CopyMapIter<'_> {
576 Box::new(self.iter_nodes().filter_map(|(path, node)| {
576 Box::new(self.iter_nodes().filter_map(|(path, node)| {
577 node.copy_source
577 node.copy_source
578 .as_ref()
578 .as_ref()
579 .map(|copy_source| (&**path, &**copy_source))
579 .map(|copy_source| (&**path, &**copy_source))
580 }))
580 }))
581 }
581 }
582
582
583 fn copy_map_contains_key(&self, key: &HgPath) -> bool {
583 fn copy_map_contains_key(&self, key: &HgPath) -> bool {
584 if let Some(node) = self.get_node(key) {
584 if let Some(node) = self.get_node(key) {
585 node.copy_source.is_some()
585 node.copy_source.is_some()
586 } else {
586 } else {
587 false
587 false
588 }
588 }
589 }
589 }
590
590
591 fn copy_map_get(&self, key: &HgPath) -> Option<&HgPath> {
591 fn copy_map_get(&self, key: &HgPath) -> Option<&HgPath> {
592 self.get_node(key)?.copy_source.as_ref().map(|p| &**p)
592 self.get_node(key)?.copy_source.as_ref().map(|p| &**p)
593 }
593 }
594
594
595 fn copy_map_remove(&mut self, key: &HgPath) -> Option<HgPathBuf> {
595 fn copy_map_remove(&mut self, key: &HgPath) -> Option<HgPathBuf> {
596 let count = &mut self.nodes_with_copy_source_count;
596 let count = &mut self.nodes_with_copy_source_count;
597 Self::get_node_mut(&mut self.root, key).and_then(|node| {
597 Self::get_node_mut(&mut self.root, key).and_then(|node| {
598 if node.copy_source.is_some() {
598 if node.copy_source.is_some() {
599 *count -= 1
599 *count -= 1
600 }
600 }
601 node.copy_source.take().map(Cow::into_owned)
601 node.copy_source.take().map(Cow::into_owned)
602 })
602 })
603 }
603 }
604
604
605 fn copy_map_insert(
605 fn copy_map_insert(
606 &mut self,
606 &mut self,
607 key: HgPathBuf,
607 key: HgPathBuf,
608 value: HgPathBuf,
608 value: HgPathBuf,
609 ) -> Option<HgPathBuf> {
609 ) -> Option<HgPathBuf> {
610 let node = Self::get_or_insert_node(
610 let node = Self::get_or_insert_node(
611 &mut self.root,
611 &mut self.root,
612 &key,
612 &key,
613 WithBasename::to_cow_owned,
613 WithBasename::to_cow_owned,
614 |_ancestor| {},
614 |_ancestor| {},
615 );
615 );
616 if node.copy_source.is_none() {
616 if node.copy_source.is_none() {
617 self.nodes_with_copy_source_count += 1
617 self.nodes_with_copy_source_count += 1
618 }
618 }
619 node.copy_source.replace(value.into()).map(Cow::into_owned)
619 node.copy_source.replace(value.into()).map(Cow::into_owned)
620 }
620 }
621
621
622 fn len(&self) -> usize {
622 fn len(&self) -> usize {
623 self.nodes_with_entry_count as usize
623 self.nodes_with_entry_count as usize
624 }
624 }
625
625
626 fn contains_key(&self, key: &HgPath) -> bool {
626 fn contains_key(&self, key: &HgPath) -> bool {
627 self.get(key).is_some()
627 self.get(key).is_some()
628 }
628 }
629
629
630 fn get(&self, key: &HgPath) -> Option<&DirstateEntry> {
630 fn get(&self, key: &HgPath) -> Option<&DirstateEntry> {
631 self.get_node(key)?.entry.as_ref()
631 self.get_node(key)?.entry.as_ref()
632 }
632 }
633
633
634 fn iter(&self) -> StateMapIter<'_> {
634 fn iter(&self) -> StateMapIter<'_> {
635 Box::new(self.iter_nodes().filter_map(|(path, node)| {
635 Box::new(self.iter_nodes().filter_map(|(path, node)| {
636 node.entry.as_ref().map(|entry| (&**path, entry))
636 node.entry.as_ref().map(|entry| (&**path, entry))
637 }))
637 }))
638 }
638 }
639 }
639 }
@@ -1,326 +1,326 b''
1 //! The "version 2" disk representation of the dirstate
1 //! The "version 2" disk representation of the dirstate
2 //!
2 //!
3 //! # File format
3 //! # File format
4 //!
4 //!
5 //! The file starts with a fixed-sized header, whose layout is defined by the
5 //! The file starts with a fixed-sized header, whose layout is defined by the
6 //! `Header` struct. Its `root` field contains the slice (offset and length) to
6 //! `Header` struct. Its `root` field contains the slice (offset and length) to
7 //! the nodes representing the files and directories at the root of the
7 //! the nodes representing the files and directories at the root of the
8 //! repository. Each node is also fixed-size, defined by the `Node` struct.
8 //! repository. Each node is also fixed-size, defined by the `Node` struct.
9 //! Nodes in turn contain slices to variable-size paths, and to their own child
9 //! Nodes in turn contain slices to variable-size paths, and to their own child
10 //! nodes (if any) for nested files and directories.
10 //! nodes (if any) for nested files and directories.
11
11
12 use crate::dirstate_tree::dirstate_map::{self, DirstateMap};
12 use crate::dirstate_tree::dirstate_map::{self, DirstateMap};
13 use crate::dirstate_tree::path_with_basename::WithBasename;
13 use crate::dirstate_tree::path_with_basename::WithBasename;
14 use crate::errors::HgError;
14 use crate::errors::HgError;
15 use crate::utils::hg_path::HgPath;
15 use crate::utils::hg_path::HgPath;
16 use crate::DirstateEntry;
16 use crate::DirstateEntry;
17 use crate::DirstateError;
17 use crate::DirstateError;
18 use crate::DirstateParents;
18 use crate::DirstateParents;
19 use bytes_cast::unaligned::{I32Be, U32Be, U64Be};
19 use bytes_cast::unaligned::{I32Be, U32Be, U64Be};
20 use bytes_cast::BytesCast;
20 use bytes_cast::BytesCast;
21 use std::borrow::Cow;
21 use std::borrow::Cow;
22 use std::convert::{TryFrom, TryInto};
22 use std::convert::{TryFrom, TryInto};
23
23
24 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
24 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
25 /// This a redundant sanity check more than an actual "magic number" since
25 /// This a redundant sanity check more than an actual "magic number" since
26 /// `.hg/requires` already governs which format should be used.
26 /// `.hg/requires` already governs which format should be used.
27 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
27 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
28
28
29 #[derive(BytesCast)]
29 #[derive(BytesCast)]
30 #[repr(C)]
30 #[repr(C)]
31 struct Header {
31 struct Header {
32 marker: [u8; V2_FORMAT_MARKER.len()],
32 marker: [u8; V2_FORMAT_MARKER.len()],
33
33
34 /// `dirstatemap.parents()` in `mercurial/dirstate.py` relies on this
34 /// `dirstatemap.parents()` in `mercurial/dirstate.py` relies on this
35 /// `parents` field being at this offset, immediately after `marker`.
35 /// `parents` field being at this offset, immediately after `marker`.
36 parents: DirstateParents,
36 parents: DirstateParents,
37
37
38 root: ChildNodes,
38 root: ChildNodes,
39 nodes_with_entry_count: Size,
39 nodes_with_entry_count: Size,
40 nodes_with_copy_source_count: Size,
40 nodes_with_copy_source_count: Size,
41 }
41 }
42
42
43 #[derive(BytesCast)]
43 #[derive(BytesCast)]
44 #[repr(C)]
44 #[repr(C)]
45 struct Node {
45 struct Node {
46 full_path: PathSlice,
46 full_path: PathSlice,
47
47
48 /// In bytes from `self.full_path.start`
48 /// In bytes from `self.full_path.start`
49 base_name_start: Size,
49 base_name_start: Size,
50
50
51 copy_source: OptPathSlice,
51 copy_source: OptPathSlice,
52 entry: OptEntry,
52 entry: OptEntry,
53 children: ChildNodes,
53 children: ChildNodes,
54 tracked_descendants_count: Size,
54 tracked_descendants_count: Size,
55 }
55 }
56
56
57 /// Either nothing if `state == b'\0'`, or a dirstate entry like in the v1
57 /// Either nothing if `state == b'\0'`, or a dirstate entry like in the v1
58 /// format
58 /// format
59 #[derive(BytesCast)]
59 #[derive(BytesCast)]
60 #[repr(C)]
60 #[repr(C)]
61 struct OptEntry {
61 struct OptEntry {
62 state: u8,
62 state: u8,
63 mode: I32Be,
63 mode: I32Be,
64 mtime: I32Be,
64 mtime: I32Be,
65 size: I32Be,
65 size: I32Be,
66 }
66 }
67
67
68 /// Counted in bytes from the start of the file
68 /// Counted in bytes from the start of the file
69 ///
69 ///
70 /// NOTE: If we decide to never support `.hg/dirstate` files larger than 4 GiB
70 /// NOTE: If we decide to never support `.hg/dirstate` files larger than 4 GiB
71 /// we could save space by using `U32Be` instead.
71 /// we could save space by using `U32Be` instead.
72 type Offset = U64Be;
72 type Offset = U64Be;
73
73
74 /// Counted in number of items
74 /// Counted in number of items
75 ///
75 ///
76 /// NOTE: not supporting directories with more than 4 billion direct children,
76 /// NOTE: not supporting directories with more than 4 billion direct children,
77 /// or filenames more than 4 GiB.
77 /// or filenames more than 4 GiB.
78 type Size = U32Be;
78 type Size = U32Be;
79
79
80 /// Location of consecutive, fixed-size items.
80 /// Location of consecutive, fixed-size items.
81 ///
81 ///
82 /// An item can be a single byte for paths, or a struct with
82 /// An item can be a single byte for paths, or a struct with
83 /// `derive(BytesCast)`.
83 /// `derive(BytesCast)`.
84 #[derive(BytesCast, Copy, Clone)]
84 #[derive(BytesCast, Copy, Clone)]
85 #[repr(C)]
85 #[repr(C)]
86 struct Slice {
86 struct Slice {
87 start: Offset,
87 start: Offset,
88 len: Size,
88 len: Size,
89 }
89 }
90
90
91 /// A contiguous sequence of `len` times `Node`, representing the child nodes
91 /// A contiguous sequence of `len` times `Node`, representing the child nodes
92 /// of either some other node or of the repository root.
92 /// of either some other node or of the repository root.
93 ///
93 ///
94 /// Always sorted by ascending `full_path`, to allow binary search.
94 /// Always sorted by ascending `full_path`, to allow binary search.
95 /// Since nodes with the same parent nodes also have the same parent path,
95 /// Since nodes with the same parent nodes also have the same parent path,
96 /// only the `base_name`s need to be compared during binary search.
96 /// only the `base_name`s need to be compared during binary search.
97 type ChildNodes = Slice;
97 type ChildNodes = Slice;
98
98
99 /// A `HgPath` of `len` bytes
99 /// A `HgPath` of `len` bytes
100 type PathSlice = Slice;
100 type PathSlice = Slice;
101
101
102 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
102 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
103 type OptPathSlice = Slice;
103 type OptPathSlice = Slice;
104
104
105 /// Make sure that size-affecting changes are made knowingly
105 /// Make sure that size-affecting changes are made knowingly
106 fn _static_assert_size_of() {
106 fn _static_assert_size_of() {
107 let _ = std::mem::transmute::<Header, [u8; 72]>;
107 let _ = std::mem::transmute::<Header, [u8; 72]>;
108 let _ = std::mem::transmute::<Node, [u8; 57]>;
108 let _ = std::mem::transmute::<Node, [u8; 57]>;
109 }
109 }
110
110
111 pub(super) fn read<'on_disk>(
111 pub(super) fn read<'on_disk>(
112 on_disk: &'on_disk [u8],
112 on_disk: &'on_disk [u8],
113 ) -> Result<(DirstateMap<'on_disk>, Option<DirstateParents>), DirstateError> {
113 ) -> Result<(DirstateMap<'on_disk>, Option<DirstateParents>), DirstateError> {
114 if on_disk.is_empty() {
114 if on_disk.is_empty() {
115 return Ok((DirstateMap::empty(on_disk), None));
115 return Ok((DirstateMap::empty(on_disk), None));
116 }
116 }
117 let (header, _) = Header::from_bytes(on_disk)
117 let (header, _) = Header::from_bytes(on_disk)
118 .map_err(|_| HgError::corrupted("truncated dirstate-v2"))?;
118 .map_err(|_| HgError::corrupted("truncated dirstate-v2"))?;
119 let Header {
119 let Header {
120 marker,
120 marker,
121 parents,
121 parents,
122 root,
122 root,
123 nodes_with_entry_count,
123 nodes_with_entry_count,
124 nodes_with_copy_source_count,
124 nodes_with_copy_source_count,
125 } = header;
125 } = header;
126 if marker != V2_FORMAT_MARKER {
126 if marker != V2_FORMAT_MARKER {
127 return Err(HgError::corrupted("missing dirstated-v2 marker").into());
127 return Err(HgError::corrupted("missing dirstated-v2 marker").into());
128 }
128 }
129 let dirstate_map = DirstateMap {
129 let dirstate_map = DirstateMap {
130 on_disk,
130 on_disk,
131 root: read_nodes(on_disk, *root)?,
131 root: read_nodes(on_disk, *root)?,
132 nodes_with_entry_count: nodes_with_entry_count.get(),
132 nodes_with_entry_count: nodes_with_entry_count.get(),
133 nodes_with_copy_source_count: nodes_with_copy_source_count.get(),
133 nodes_with_copy_source_count: nodes_with_copy_source_count.get(),
134 };
134 };
135 let parents = Some(parents.clone());
135 let parents = Some(parents.clone());
136 Ok((dirstate_map, parents))
136 Ok((dirstate_map, parents))
137 }
137 }
138
138
139 impl Node {
139 impl Node {
140 pub(super) fn path<'on_disk>(
140 pub(super) fn path<'on_disk>(
141 &self,
141 &self,
142 on_disk: &'on_disk [u8],
142 on_disk: &'on_disk [u8],
143 ) -> Result<dirstate_map::NodeKey<'on_disk>, HgError> {
143 ) -> Result<dirstate_map::NodeKey<'on_disk>, HgError> {
144 let full_path = read_hg_path(on_disk, self.full_path)?;
144 let full_path = read_hg_path(on_disk, self.full_path)?;
145 let base_name_start = usize::try_from(self.base_name_start.get())
145 let base_name_start = usize::try_from(self.base_name_start.get())
146 // u32 -> usize, could only panic on a 16-bit CPU
146 // u32 -> usize, could only panic on a 16-bit CPU
147 .expect("dirstate-v2 base_name_start out of bounds");
147 .expect("dirstate-v2 base_name_start out of bounds");
148 if base_name_start < full_path.len() {
148 if base_name_start < full_path.len() {
149 Ok(WithBasename::from_raw_parts(full_path, base_name_start))
149 Ok(WithBasename::from_raw_parts(full_path, base_name_start))
150 } else {
150 } else {
151 Err(HgError::corrupted(
151 Err(HgError::corrupted(
152 "dirstate-v2 base_name_start out of bounds",
152 "dirstate-v2 base_name_start out of bounds",
153 ))
153 ))
154 }
154 }
155 }
155 }
156
156
157 pub(super) fn copy_source<'on_disk>(
157 pub(super) fn copy_source<'on_disk>(
158 &self,
158 &self,
159 on_disk: &'on_disk [u8],
159 on_disk: &'on_disk [u8],
160 ) -> Result<Option<Cow<'on_disk, HgPath>>, HgError> {
160 ) -> Result<Option<Cow<'on_disk, HgPath>>, HgError> {
161 Ok(if self.copy_source.start.get() != 0 {
161 Ok(if self.copy_source.start.get() != 0 {
162 Some(read_hg_path(on_disk, self.copy_source)?)
162 Some(read_hg_path(on_disk, self.copy_source)?)
163 } else {
163 } else {
164 None
164 None
165 })
165 })
166 }
166 }
167
167
168 pub(super) fn entry(&self) -> Result<Option<DirstateEntry>, HgError> {
168 pub(super) fn entry(&self) -> Result<Option<DirstateEntry>, HgError> {
169 Ok(if self.entry.state != b'\0' {
169 Ok(if self.entry.state != b'\0' {
170 Some(DirstateEntry {
170 Some(DirstateEntry {
171 state: self.entry.state.try_into()?,
171 state: self.entry.state.try_into()?,
172 mode: self.entry.mode.get(),
172 mode: self.entry.mode.get(),
173 mtime: self.entry.mtime.get(),
173 mtime: self.entry.mtime.get(),
174 size: self.entry.size.get(),
174 size: self.entry.size.get(),
175 })
175 })
176 } else {
176 } else {
177 None
177 None
178 })
178 })
179 }
179 }
180
180
181 pub(super) fn to_in_memory_node<'on_disk>(
181 pub(super) fn to_in_memory_node<'on_disk>(
182 &self,
182 &self,
183 on_disk: &'on_disk [u8],
183 on_disk: &'on_disk [u8],
184 ) -> Result<dirstate_map::Node<'on_disk>, HgError> {
184 ) -> Result<dirstate_map::Node<'on_disk>, HgError> {
185 Ok(dirstate_map::Node {
185 Ok(dirstate_map::Node {
186 children: read_nodes(on_disk, self.children)?,
186 children: read_nodes(on_disk, self.children)?,
187 copy_source: self.copy_source(on_disk)?,
187 copy_source: self.copy_source(on_disk)?,
188 entry: self.entry()?,
188 entry: self.entry()?,
189 tracked_descendants_count: self.tracked_descendants_count.get(),
189 tracked_descendants_count: self.tracked_descendants_count.get(),
190 })
190 })
191 }
191 }
192 }
192 }
193
193
194 fn read_nodes(
194 fn read_nodes(
195 on_disk: &[u8],
195 on_disk: &[u8],
196 slice: ChildNodes,
196 slice: ChildNodes,
197 ) -> Result<dirstate_map::ChildNodes, HgError> {
197 ) -> Result<dirstate_map::ChildNodes, HgError> {
198 read_slice::<Node>(on_disk, slice)?
198 read_slice::<Node>(on_disk, slice)?
199 .iter()
199 .iter()
200 .map(|node| {
200 .map(|node| {
201 Ok((node.path(on_disk)?, node.to_in_memory_node(on_disk)?))
201 Ok((node.path(on_disk)?, node.to_in_memory_node(on_disk)?))
202 })
202 })
203 .collect()
203 .collect()
204 }
204 }
205
205
206 fn read_hg_path(on_disk: &[u8], slice: Slice) -> Result<Cow<HgPath>, HgError> {
206 fn read_hg_path(on_disk: &[u8], slice: Slice) -> Result<Cow<HgPath>, HgError> {
207 let bytes = read_slice::<u8>(on_disk, slice)?;
207 let bytes = read_slice::<u8>(on_disk, slice)?;
208 Ok(Cow::Borrowed(HgPath::new(bytes)))
208 Ok(Cow::Borrowed(HgPath::new(bytes)))
209 }
209 }
210
210
211 fn read_slice<T>(on_disk: &[u8], slice: Slice) -> Result<&[T], HgError>
211 fn read_slice<T>(on_disk: &[u8], slice: Slice) -> Result<&[T], HgError>
212 where
212 where
213 T: BytesCast,
213 T: BytesCast,
214 {
214 {
215 // Either `usize::MAX` would result in "out of bounds" error since a single
215 // Either `usize::MAX` would result in "out of bounds" error since a single
216 // `&[u8]` cannot occupy the entire addess space.
216 // `&[u8]` cannot occupy the entire addess space.
217 let start = usize::try_from(slice.start.get()).unwrap_or(std::usize::MAX);
217 let start = usize::try_from(slice.start.get()).unwrap_or(std::usize::MAX);
218 let len = usize::try_from(slice.len.get()).unwrap_or(std::usize::MAX);
218 let len = usize::try_from(slice.len.get()).unwrap_or(std::usize::MAX);
219 on_disk
219 on_disk
220 .get(start..)
220 .get(start..)
221 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
221 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
222 .map(|(slice, _rest)| slice)
222 .map(|(slice, _rest)| slice)
223 .ok_or_else(|| {
223 .ok_or_else(|| {
224 HgError::corrupted("dirstate v2 slice is out of bounds")
224 HgError::corrupted("dirstate v2 slice is out of bounds")
225 })
225 })
226 }
226 }
227
227
228 pub(super) fn write(
228 pub(super) fn write(
229 dirstate_map: &mut DirstateMap,
229 dirstate_map: &mut DirstateMap,
230 parents: DirstateParents,
230 parents: DirstateParents,
231 ) -> Result<Vec<u8>, DirstateError> {
231 ) -> Result<Vec<u8>, DirstateError> {
232 let header_len = std::mem::size_of::<Header>();
232 let header_len = std::mem::size_of::<Header>();
233
233
234 // This ignores the space for paths, and for nodes without an entry.
234 // This ignores the space for paths, and for nodes without an entry.
235 // TODO: better estimate? Skip the `Vec` and write to a file directly?
235 // TODO: better estimate? Skip the `Vec` and write to a file directly?
236 let size_guess = header_len
236 let size_guess = header_len
237 + std::mem::size_of::<Node>()
237 + std::mem::size_of::<Node>()
238 * dirstate_map.nodes_with_entry_count as usize;
238 * dirstate_map.nodes_with_entry_count as usize;
239 let mut out = Vec::with_capacity(size_guess);
239 let mut out = Vec::with_capacity(size_guess);
240
240
241 // Keep space for the header. We’ll fill it out at the end when we know the
241 // Keep space for the header. We’ll fill it out at the end when we know the
242 // actual offset for the root nodes.
242 // actual offset for the root nodes.
243 out.resize(header_len, 0_u8);
243 out.resize(header_len, 0_u8);
244
244
245 let root = write_nodes(&mut dirstate_map.root, &mut out)?;
245 let root = write_nodes(&mut dirstate_map.root, &mut out)?;
246
246
247 let header = Header {
247 let header = Header {
248 marker: *V2_FORMAT_MARKER,
248 marker: *V2_FORMAT_MARKER,
249 parents: parents,
249 parents: parents,
250 root,
250 root,
251 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
251 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
252 nodes_with_copy_source_count: dirstate_map
252 nodes_with_copy_source_count: dirstate_map
253 .nodes_with_copy_source_count
253 .nodes_with_copy_source_count
254 .into(),
254 .into(),
255 };
255 };
256 out[..header_len].copy_from_slice(header.as_bytes());
256 out[..header_len].copy_from_slice(header.as_bytes());
257 Ok(out)
257 Ok(out)
258 }
258 }
259
259
260 fn write_nodes(
260 fn write_nodes(
261 nodes: &mut dirstate_map::ChildNodes,
261 nodes: &dirstate_map::ChildNodes,
262 out: &mut Vec<u8>,
262 out: &mut Vec<u8>,
263 ) -> Result<ChildNodes, DirstateError> {
263 ) -> Result<ChildNodes, DirstateError> {
264 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
264 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
265 // order. Sort to enable binary search in the written file.
265 // order. Sort to enable binary search in the written file.
266 let nodes = dirstate_map::Node::sorted(nodes);
266 let nodes = dirstate_map::Node::sorted(nodes);
267
267
268 // First accumulate serialized nodes in a `Vec`
268 // First accumulate serialized nodes in a `Vec`
269 let mut on_disk_nodes = Vec::with_capacity(nodes.len());
269 let mut on_disk_nodes = Vec::with_capacity(nodes.len());
270 for (full_path, node) in nodes {
270 for (full_path, node) in nodes {
271 on_disk_nodes.push(Node {
271 on_disk_nodes.push(Node {
272 children: write_nodes(&mut node.children, out)?,
272 children: write_nodes(&node.children, out)?,
273 tracked_descendants_count: node.tracked_descendants_count.into(),
273 tracked_descendants_count: node.tracked_descendants_count.into(),
274 full_path: write_slice::<u8>(
274 full_path: write_slice::<u8>(
275 full_path.full_path().as_bytes(),
275 full_path.full_path().as_bytes(),
276 out,
276 out,
277 ),
277 ),
278 base_name_start: u32::try_from(full_path.base_name_start())
278 base_name_start: u32::try_from(full_path.base_name_start())
279 // Could only panic for paths over 4 GiB
279 // Could only panic for paths over 4 GiB
280 .expect("dirstate-v2 offset overflow")
280 .expect("dirstate-v2 offset overflow")
281 .into(),
281 .into(),
282 copy_source: if let Some(source) = &node.copy_source {
282 copy_source: if let Some(source) = &node.copy_source {
283 write_slice::<u8>(source.as_bytes(), out)
283 write_slice::<u8>(source.as_bytes(), out)
284 } else {
284 } else {
285 Slice {
285 Slice {
286 start: 0.into(),
286 start: 0.into(),
287 len: 0.into(),
287 len: 0.into(),
288 }
288 }
289 },
289 },
290 entry: if let Some(entry) = &mut node.entry {
290 entry: if let Some(entry) = &node.entry {
291 OptEntry {
291 OptEntry {
292 state: entry.state.into(),
292 state: entry.state.into(),
293 mode: entry.mode.into(),
293 mode: entry.mode.into(),
294 mtime: entry.mtime.into(),
294 mtime: entry.mtime.into(),
295 size: entry.size.into(),
295 size: entry.size.into(),
296 }
296 }
297 } else {
297 } else {
298 OptEntry {
298 OptEntry {
299 state: b'\0',
299 state: b'\0',
300 mode: 0.into(),
300 mode: 0.into(),
301 mtime: 0.into(),
301 mtime: 0.into(),
302 size: 0.into(),
302 size: 0.into(),
303 }
303 }
304 },
304 },
305 })
305 })
306 }
306 }
307 // … so we can write them contiguously
307 // … so we can write them contiguously
308 Ok(write_slice::<Node>(&on_disk_nodes, out))
308 Ok(write_slice::<Node>(&on_disk_nodes, out))
309 }
309 }
310
310
311 fn write_slice<T>(slice: &[T], out: &mut Vec<u8>) -> Slice
311 fn write_slice<T>(slice: &[T], out: &mut Vec<u8>) -> Slice
312 where
312 where
313 T: BytesCast,
313 T: BytesCast,
314 {
314 {
315 let start = u64::try_from(out.len())
315 let start = u64::try_from(out.len())
316 // Could only panic on a 128-bit CPU with a dirstate over 16 EiB
316 // Could only panic on a 128-bit CPU with a dirstate over 16 EiB
317 .expect("dirstate-v2 offset overflow")
317 .expect("dirstate-v2 offset overflow")
318 .into();
318 .into();
319 let len = u32::try_from(slice.len())
319 let len = u32::try_from(slice.len())
320 // Could only panic for paths over 4 GiB or nodes with over 4 billions
320 // Could only panic for paths over 4 GiB or nodes with over 4 billions
321 // child nodes
321 // child nodes
322 .expect("dirstate-v2 offset overflow")
322 .expect("dirstate-v2 offset overflow")
323 .into();
323 .into();
324 out.extend(slice.as_bytes());
324 out.extend(slice.as_bytes());
325 Slice { start, len }
325 Slice { start, len }
326 }
326 }
@@ -1,426 +1,426 b''
1 use crate::dirstate::status::IgnoreFnType;
1 use crate::dirstate::status::IgnoreFnType;
2 use crate::dirstate_tree::dirstate_map::ChildNodes;
2 use crate::dirstate_tree::dirstate_map::ChildNodes;
3 use crate::dirstate_tree::dirstate_map::DirstateMap;
3 use crate::dirstate_tree::dirstate_map::DirstateMap;
4 use crate::dirstate_tree::dirstate_map::Node;
4 use crate::dirstate_tree::dirstate_map::Node;
5 use crate::matchers::get_ignore_function;
5 use crate::matchers::get_ignore_function;
6 use crate::matchers::Matcher;
6 use crate::matchers::Matcher;
7 use crate::utils::files::get_bytes_from_os_string;
7 use crate::utils::files::get_bytes_from_os_string;
8 use crate::utils::hg_path::HgPath;
8 use crate::utils::hg_path::HgPath;
9 use crate::BadMatch;
9 use crate::BadMatch;
10 use crate::DirstateStatus;
10 use crate::DirstateStatus;
11 use crate::EntryState;
11 use crate::EntryState;
12 use crate::HgPathBuf;
12 use crate::HgPathBuf;
13 use crate::PatternFileWarning;
13 use crate::PatternFileWarning;
14 use crate::StatusError;
14 use crate::StatusError;
15 use crate::StatusOptions;
15 use crate::StatusOptions;
16 use micro_timer::timed;
16 use micro_timer::timed;
17 use rayon::prelude::*;
17 use rayon::prelude::*;
18 use std::borrow::Cow;
18 use std::borrow::Cow;
19 use std::io;
19 use std::io;
20 use std::path::Path;
20 use std::path::Path;
21 use std::path::PathBuf;
21 use std::path::PathBuf;
22 use std::sync::Mutex;
22 use std::sync::Mutex;
23
23
24 /// Returns the status of the working directory compared to its parent
24 /// Returns the status of the working directory compared to its parent
25 /// changeset.
25 /// changeset.
26 ///
26 ///
27 /// This algorithm is based on traversing the filesystem tree (`fs` in function
27 /// This algorithm is based on traversing the filesystem tree (`fs` in function
28 /// and variable names) and dirstate tree at the same time. The core of this
28 /// and variable names) and dirstate tree at the same time. The core of this
29 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
29 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
30 /// and its use of `itertools::merge_join_by`. When reaching a path that only
30 /// and its use of `itertools::merge_join_by`. When reaching a path that only
31 /// exists in one of the two trees, depending on information requested by
31 /// exists in one of the two trees, depending on information requested by
32 /// `options` we may need to traverse the remaining subtree.
32 /// `options` we may need to traverse the remaining subtree.
33 #[timed]
33 #[timed]
34 pub fn status<'tree>(
34 pub fn status<'tree>(
35 dmap: &'tree mut DirstateMap,
35 dmap: &'tree mut DirstateMap,
36 matcher: &(dyn Matcher + Sync),
36 matcher: &(dyn Matcher + Sync),
37 root_dir: PathBuf,
37 root_dir: PathBuf,
38 ignore_files: Vec<PathBuf>,
38 ignore_files: Vec<PathBuf>,
39 options: StatusOptions,
39 options: StatusOptions,
40 ) -> Result<(DirstateStatus<'tree>, Vec<PatternFileWarning>), StatusError> {
40 ) -> Result<(DirstateStatus<'tree>, Vec<PatternFileWarning>), StatusError> {
41 let (ignore_fn, warnings): (IgnoreFnType, _) =
41 let (ignore_fn, warnings): (IgnoreFnType, _) =
42 if options.list_ignored || options.list_unknown {
42 if options.list_ignored || options.list_unknown {
43 get_ignore_function(ignore_files, &root_dir)?
43 get_ignore_function(ignore_files, &root_dir)?
44 } else {
44 } else {
45 (Box::new(|&_| true), vec![])
45 (Box::new(|&_| true), vec![])
46 };
46 };
47
47
48 let common = StatusCommon {
48 let common = StatusCommon {
49 options,
49 options,
50 matcher,
50 matcher,
51 ignore_fn,
51 ignore_fn,
52 outcome: Mutex::new(DirstateStatus::default()),
52 outcome: Mutex::new(DirstateStatus::default()),
53 };
53 };
54 let is_at_repo_root = true;
54 let is_at_repo_root = true;
55 let hg_path = HgPath::new("");
55 let hg_path = HgPath::new("");
56 let has_ignored_ancestor = false;
56 let has_ignored_ancestor = false;
57 common.traverse_fs_directory_and_dirstate(
57 common.traverse_fs_directory_and_dirstate(
58 has_ignored_ancestor,
58 has_ignored_ancestor,
59 &mut dmap.root,
59 &dmap.root,
60 hg_path,
60 hg_path,
61 &root_dir,
61 &root_dir,
62 is_at_repo_root,
62 is_at_repo_root,
63 );
63 );
64 Ok((common.outcome.into_inner().unwrap(), warnings))
64 Ok((common.outcome.into_inner().unwrap(), warnings))
65 }
65 }
66
66
67 /// Bag of random things needed by various parts of the algorithm. Reduces the
67 /// Bag of random things needed by various parts of the algorithm. Reduces the
68 /// number of parameters passed to functions.
68 /// number of parameters passed to functions.
69 struct StatusCommon<'tree, 'a> {
69 struct StatusCommon<'tree, 'a> {
70 options: StatusOptions,
70 options: StatusOptions,
71 matcher: &'a (dyn Matcher + Sync),
71 matcher: &'a (dyn Matcher + Sync),
72 ignore_fn: IgnoreFnType<'a>,
72 ignore_fn: IgnoreFnType<'a>,
73 outcome: Mutex<DirstateStatus<'tree>>,
73 outcome: Mutex<DirstateStatus<'tree>>,
74 }
74 }
75
75
76 impl<'tree, 'a> StatusCommon<'tree, 'a> {
76 impl<'tree, 'a> StatusCommon<'tree, 'a> {
77 fn read_dir(
77 fn read_dir(
78 &self,
78 &self,
79 hg_path: &HgPath,
79 hg_path: &HgPath,
80 fs_path: &Path,
80 fs_path: &Path,
81 is_at_repo_root: bool,
81 is_at_repo_root: bool,
82 ) -> Result<Vec<DirEntry>, ()> {
82 ) -> Result<Vec<DirEntry>, ()> {
83 DirEntry::read_dir(fs_path, is_at_repo_root).map_err(|error| {
83 DirEntry::read_dir(fs_path, is_at_repo_root).map_err(|error| {
84 let errno = error.raw_os_error().expect("expected real OS error");
84 let errno = error.raw_os_error().expect("expected real OS error");
85 self.outcome
85 self.outcome
86 .lock()
86 .lock()
87 .unwrap()
87 .unwrap()
88 .bad
88 .bad
89 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
89 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
90 })
90 })
91 }
91 }
92
92
93 fn traverse_fs_directory_and_dirstate(
93 fn traverse_fs_directory_and_dirstate(
94 &self,
94 &self,
95 has_ignored_ancestor: bool,
95 has_ignored_ancestor: bool,
96 dirstate_nodes: &'tree mut ChildNodes,
96 dirstate_nodes: &'tree ChildNodes,
97 directory_hg_path: &'tree HgPath,
97 directory_hg_path: &'tree HgPath,
98 directory_fs_path: &Path,
98 directory_fs_path: &Path,
99 is_at_repo_root: bool,
99 is_at_repo_root: bool,
100 ) {
100 ) {
101 let mut fs_entries = if let Ok(entries) = self.read_dir(
101 let mut fs_entries = if let Ok(entries) = self.read_dir(
102 directory_hg_path,
102 directory_hg_path,
103 directory_fs_path,
103 directory_fs_path,
104 is_at_repo_root,
104 is_at_repo_root,
105 ) {
105 ) {
106 entries
106 entries
107 } else {
107 } else {
108 return;
108 return;
109 };
109 };
110
110
111 // `merge_join_by` requires both its input iterators to be sorted:
111 // `merge_join_by` requires both its input iterators to be sorted:
112
112
113 let dirstate_nodes = Node::sorted(dirstate_nodes);
113 let dirstate_nodes = Node::sorted(dirstate_nodes);
114 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
114 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
115 // https://github.com/rust-lang/rust/issues/34162
115 // https://github.com/rust-lang/rust/issues/34162
116 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
116 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
117
117
118 itertools::merge_join_by(
118 itertools::merge_join_by(
119 dirstate_nodes,
119 dirstate_nodes,
120 &fs_entries,
120 &fs_entries,
121 |(full_path, _node), fs_entry| {
121 |(full_path, _node), fs_entry| {
122 full_path.base_name().cmp(&fs_entry.base_name)
122 full_path.base_name().cmp(&fs_entry.base_name)
123 },
123 },
124 )
124 )
125 .par_bridge()
125 .par_bridge()
126 .for_each(|pair| {
126 .for_each(|pair| {
127 use itertools::EitherOrBoth::*;
127 use itertools::EitherOrBoth::*;
128 match pair {
128 match pair {
129 Both((hg_path, dirstate_node), fs_entry) => {
129 Both((hg_path, dirstate_node), fs_entry) => {
130 self.traverse_fs_and_dirstate(
130 self.traverse_fs_and_dirstate(
131 fs_entry,
131 fs_entry,
132 hg_path.full_path(),
132 hg_path.full_path(),
133 dirstate_node,
133 dirstate_node,
134 has_ignored_ancestor,
134 has_ignored_ancestor,
135 );
135 );
136 }
136 }
137 Left((hg_path, dirstate_node)) => self.traverse_dirstate_only(
137 Left((hg_path, dirstate_node)) => self.traverse_dirstate_only(
138 hg_path.full_path(),
138 hg_path.full_path(),
139 dirstate_node,
139 dirstate_node,
140 ),
140 ),
141 Right(fs_entry) => self.traverse_fs_only(
141 Right(fs_entry) => self.traverse_fs_only(
142 has_ignored_ancestor,
142 has_ignored_ancestor,
143 directory_hg_path,
143 directory_hg_path,
144 fs_entry,
144 fs_entry,
145 ),
145 ),
146 }
146 }
147 })
147 })
148 }
148 }
149
149
150 fn traverse_fs_and_dirstate(
150 fn traverse_fs_and_dirstate(
151 &self,
151 &self,
152 fs_entry: &DirEntry,
152 fs_entry: &DirEntry,
153 hg_path: &'tree HgPath,
153 hg_path: &'tree HgPath,
154 dirstate_node: &'tree mut Node,
154 dirstate_node: &'tree Node,
155 has_ignored_ancestor: bool,
155 has_ignored_ancestor: bool,
156 ) {
156 ) {
157 let file_type = fs_entry.metadata.file_type();
157 let file_type = fs_entry.metadata.file_type();
158 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
158 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
159 if !file_or_symlink {
159 if !file_or_symlink {
160 // If we previously had a file here, it was removed (with
160 // If we previously had a file here, it was removed (with
161 // `hg rm` or similar) or deleted before it could be
161 // `hg rm` or similar) or deleted before it could be
162 // replaced by a directory or something else.
162 // replaced by a directory or something else.
163 self.mark_removed_or_deleted_if_file(
163 self.mark_removed_or_deleted_if_file(
164 hg_path,
164 hg_path,
165 dirstate_node.state(),
165 dirstate_node.state(),
166 );
166 );
167 }
167 }
168 if file_type.is_dir() {
168 if file_type.is_dir() {
169 if self.options.collect_traversed_dirs {
169 if self.options.collect_traversed_dirs {
170 self.outcome.lock().unwrap().traversed.push(hg_path.into())
170 self.outcome.lock().unwrap().traversed.push(hg_path.into())
171 }
171 }
172 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
172 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
173 let is_at_repo_root = false;
173 let is_at_repo_root = false;
174 self.traverse_fs_directory_and_dirstate(
174 self.traverse_fs_directory_and_dirstate(
175 is_ignored,
175 is_ignored,
176 &mut dirstate_node.children,
176 &dirstate_node.children,
177 hg_path,
177 hg_path,
178 &fs_entry.full_path,
178 &fs_entry.full_path,
179 is_at_repo_root,
179 is_at_repo_root,
180 );
180 );
181 } else {
181 } else {
182 if file_or_symlink && self.matcher.matches(hg_path) {
182 if file_or_symlink && self.matcher.matches(hg_path) {
183 let full_path = Cow::from(hg_path);
183 let full_path = Cow::from(hg_path);
184 if let Some(entry) = &dirstate_node.entry {
184 if let Some(entry) = &dirstate_node.entry {
185 match entry.state {
185 match entry.state {
186 EntryState::Added => {
186 EntryState::Added => {
187 self.outcome.lock().unwrap().added.push(full_path)
187 self.outcome.lock().unwrap().added.push(full_path)
188 }
188 }
189 EntryState::Removed => self
189 EntryState::Removed => self
190 .outcome
190 .outcome
191 .lock()
191 .lock()
192 .unwrap()
192 .unwrap()
193 .removed
193 .removed
194 .push(full_path),
194 .push(full_path),
195 EntryState::Merged => self
195 EntryState::Merged => self
196 .outcome
196 .outcome
197 .lock()
197 .lock()
198 .unwrap()
198 .unwrap()
199 .modified
199 .modified
200 .push(full_path),
200 .push(full_path),
201 EntryState::Normal => {
201 EntryState::Normal => {
202 self.handle_normal_file(
202 self.handle_normal_file(
203 full_path,
203 full_path,
204 dirstate_node,
204 dirstate_node,
205 entry,
205 entry,
206 fs_entry,
206 fs_entry,
207 );
207 );
208 }
208 }
209 // This variant is not used in DirstateMap
209 // This variant is not used in DirstateMap
210 // nodes
210 // nodes
211 EntryState::Unknown => unreachable!(),
211 EntryState::Unknown => unreachable!(),
212 }
212 }
213 } else {
213 } else {
214 // `node.entry.is_none()` indicates a "directory"
214 // `node.entry.is_none()` indicates a "directory"
215 // node, but the filesystem has a file
215 // node, but the filesystem has a file
216 self.mark_unknown_or_ignored(
216 self.mark_unknown_or_ignored(
217 has_ignored_ancestor,
217 has_ignored_ancestor,
218 full_path,
218 full_path,
219 )
219 )
220 }
220 }
221 }
221 }
222
222
223 for (child_hg_path, child_node) in &mut dirstate_node.children {
223 for (child_hg_path, child_node) in &dirstate_node.children {
224 self.traverse_dirstate_only(
224 self.traverse_dirstate_only(
225 child_hg_path.full_path(),
225 child_hg_path.full_path(),
226 child_node,
226 child_node,
227 )
227 )
228 }
228 }
229 }
229 }
230 }
230 }
231
231
232 /// A file with `EntryState::Normal` in the dirstate was found in the
232 /// A file with `EntryState::Normal` in the dirstate was found in the
233 /// filesystem
233 /// filesystem
234 fn handle_normal_file(
234 fn handle_normal_file(
235 &self,
235 &self,
236 full_path: Cow<'tree, HgPath>,
236 full_path: Cow<'tree, HgPath>,
237 dirstate_node: &Node,
237 dirstate_node: &Node,
238 entry: &crate::DirstateEntry,
238 entry: &crate::DirstateEntry,
239 fs_entry: &DirEntry,
239 fs_entry: &DirEntry,
240 ) {
240 ) {
241 // Keep the low 31 bits
241 // Keep the low 31 bits
242 fn truncate_u64(value: u64) -> i32 {
242 fn truncate_u64(value: u64) -> i32 {
243 (value & 0x7FFF_FFFF) as i32
243 (value & 0x7FFF_FFFF) as i32
244 }
244 }
245 fn truncate_i64(value: i64) -> i32 {
245 fn truncate_i64(value: i64) -> i32 {
246 (value & 0x7FFF_FFFF) as i32
246 (value & 0x7FFF_FFFF) as i32
247 }
247 }
248
248
249 let mode_changed = || {
249 let mode_changed = || {
250 self.options.check_exec && entry.mode_changed(&fs_entry.metadata)
250 self.options.check_exec && entry.mode_changed(&fs_entry.metadata)
251 };
251 };
252 let size_changed = entry.size != truncate_u64(fs_entry.metadata.len());
252 let size_changed = entry.size != truncate_u64(fs_entry.metadata.len());
253 if entry.size >= 0
253 if entry.size >= 0
254 && size_changed
254 && size_changed
255 && fs_entry.metadata.file_type().is_symlink()
255 && fs_entry.metadata.file_type().is_symlink()
256 {
256 {
257 // issue6456: Size returned may be longer due to encryption
257 // issue6456: Size returned may be longer due to encryption
258 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
258 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
259 self.outcome.lock().unwrap().unsure.push(full_path)
259 self.outcome.lock().unwrap().unsure.push(full_path)
260 } else if dirstate_node.copy_source.is_some()
260 } else if dirstate_node.copy_source.is_some()
261 || entry.is_from_other_parent()
261 || entry.is_from_other_parent()
262 || (entry.size >= 0 && (size_changed || mode_changed()))
262 || (entry.size >= 0 && (size_changed || mode_changed()))
263 {
263 {
264 self.outcome.lock().unwrap().modified.push(full_path)
264 self.outcome.lock().unwrap().modified.push(full_path)
265 } else {
265 } else {
266 let mtime = mtime_seconds(&fs_entry.metadata);
266 let mtime = mtime_seconds(&fs_entry.metadata);
267 if truncate_i64(mtime) != entry.mtime
267 if truncate_i64(mtime) != entry.mtime
268 || mtime == self.options.last_normal_time
268 || mtime == self.options.last_normal_time
269 {
269 {
270 self.outcome.lock().unwrap().unsure.push(full_path)
270 self.outcome.lock().unwrap().unsure.push(full_path)
271 } else if self.options.list_clean {
271 } else if self.options.list_clean {
272 self.outcome.lock().unwrap().clean.push(full_path)
272 self.outcome.lock().unwrap().clean.push(full_path)
273 }
273 }
274 }
274 }
275 }
275 }
276
276
277 /// A node in the dirstate tree has no corresponding filesystem entry
277 /// A node in the dirstate tree has no corresponding filesystem entry
278 fn traverse_dirstate_only(
278 fn traverse_dirstate_only(
279 &self,
279 &self,
280 hg_path: &'tree HgPath,
280 hg_path: &'tree HgPath,
281 dirstate_node: &'tree mut Node,
281 dirstate_node: &'tree Node,
282 ) {
282 ) {
283 self.mark_removed_or_deleted_if_file(hg_path, dirstate_node.state());
283 self.mark_removed_or_deleted_if_file(hg_path, dirstate_node.state());
284 dirstate_node.children.par_iter_mut().for_each(
284 dirstate_node.children.par_iter().for_each(
285 |(child_hg_path, child_node)| {
285 |(child_hg_path, child_node)| {
286 self.traverse_dirstate_only(
286 self.traverse_dirstate_only(
287 child_hg_path.full_path(),
287 child_hg_path.full_path(),
288 child_node,
288 child_node,
289 )
289 )
290 },
290 },
291 )
291 )
292 }
292 }
293
293
294 /// A node in the dirstate tree has no corresponding *file* on the
294 /// A node in the dirstate tree has no corresponding *file* on the
295 /// filesystem
295 /// filesystem
296 ///
296 ///
297 /// Does nothing on a "directory" node
297 /// Does nothing on a "directory" node
298 fn mark_removed_or_deleted_if_file(
298 fn mark_removed_or_deleted_if_file(
299 &self,
299 &self,
300 hg_path: &'tree HgPath,
300 hg_path: &'tree HgPath,
301 dirstate_node_state: Option<EntryState>,
301 dirstate_node_state: Option<EntryState>,
302 ) {
302 ) {
303 if let Some(state) = dirstate_node_state {
303 if let Some(state) = dirstate_node_state {
304 if self.matcher.matches(hg_path) {
304 if self.matcher.matches(hg_path) {
305 if let EntryState::Removed = state {
305 if let EntryState::Removed = state {
306 self.outcome.lock().unwrap().removed.push(hg_path.into())
306 self.outcome.lock().unwrap().removed.push(hg_path.into())
307 } else {
307 } else {
308 self.outcome.lock().unwrap().deleted.push(hg_path.into())
308 self.outcome.lock().unwrap().deleted.push(hg_path.into())
309 }
309 }
310 }
310 }
311 }
311 }
312 }
312 }
313
313
314 /// Something in the filesystem has no corresponding dirstate node
314 /// Something in the filesystem has no corresponding dirstate node
315 fn traverse_fs_only(
315 fn traverse_fs_only(
316 &self,
316 &self,
317 has_ignored_ancestor: bool,
317 has_ignored_ancestor: bool,
318 directory_hg_path: &HgPath,
318 directory_hg_path: &HgPath,
319 fs_entry: &DirEntry,
319 fs_entry: &DirEntry,
320 ) {
320 ) {
321 let hg_path = directory_hg_path.join(&fs_entry.base_name);
321 let hg_path = directory_hg_path.join(&fs_entry.base_name);
322 let file_type = fs_entry.metadata.file_type();
322 let file_type = fs_entry.metadata.file_type();
323 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
323 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
324 if file_type.is_dir() {
324 if file_type.is_dir() {
325 let is_ignored =
325 let is_ignored =
326 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
326 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
327 let traverse_children = if is_ignored {
327 let traverse_children = if is_ignored {
328 // Descendants of an ignored directory are all ignored
328 // Descendants of an ignored directory are all ignored
329 self.options.list_ignored
329 self.options.list_ignored
330 } else {
330 } else {
331 // Descendants of an unknown directory may be either unknown or
331 // Descendants of an unknown directory may be either unknown or
332 // ignored
332 // ignored
333 self.options.list_unknown || self.options.list_ignored
333 self.options.list_unknown || self.options.list_ignored
334 };
334 };
335 if traverse_children {
335 if traverse_children {
336 let is_at_repo_root = false;
336 let is_at_repo_root = false;
337 if let Ok(children_fs_entries) = self.read_dir(
337 if let Ok(children_fs_entries) = self.read_dir(
338 &hg_path,
338 &hg_path,
339 &fs_entry.full_path,
339 &fs_entry.full_path,
340 is_at_repo_root,
340 is_at_repo_root,
341 ) {
341 ) {
342 children_fs_entries.par_iter().for_each(|child_fs_entry| {
342 children_fs_entries.par_iter().for_each(|child_fs_entry| {
343 self.traverse_fs_only(
343 self.traverse_fs_only(
344 is_ignored,
344 is_ignored,
345 &hg_path,
345 &hg_path,
346 child_fs_entry,
346 child_fs_entry,
347 )
347 )
348 })
348 })
349 }
349 }
350 }
350 }
351 if self.options.collect_traversed_dirs {
351 if self.options.collect_traversed_dirs {
352 self.outcome.lock().unwrap().traversed.push(hg_path.into())
352 self.outcome.lock().unwrap().traversed.push(hg_path.into())
353 }
353 }
354 } else if file_or_symlink && self.matcher.matches(&hg_path) {
354 } else if file_or_symlink && self.matcher.matches(&hg_path) {
355 self.mark_unknown_or_ignored(has_ignored_ancestor, hg_path.into())
355 self.mark_unknown_or_ignored(has_ignored_ancestor, hg_path.into())
356 }
356 }
357 }
357 }
358
358
359 fn mark_unknown_or_ignored(
359 fn mark_unknown_or_ignored(
360 &self,
360 &self,
361 has_ignored_ancestor: bool,
361 has_ignored_ancestor: bool,
362 hg_path: Cow<'tree, HgPath>,
362 hg_path: Cow<'tree, HgPath>,
363 ) {
363 ) {
364 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
364 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
365 if is_ignored {
365 if is_ignored {
366 if self.options.list_ignored {
366 if self.options.list_ignored {
367 self.outcome.lock().unwrap().ignored.push(hg_path)
367 self.outcome.lock().unwrap().ignored.push(hg_path)
368 }
368 }
369 } else {
369 } else {
370 if self.options.list_unknown {
370 if self.options.list_unknown {
371 self.outcome.lock().unwrap().unknown.push(hg_path)
371 self.outcome.lock().unwrap().unknown.push(hg_path)
372 }
372 }
373 }
373 }
374 }
374 }
375 }
375 }
376
376
377 #[cfg(unix)] // TODO
377 #[cfg(unix)] // TODO
378 fn mtime_seconds(metadata: &std::fs::Metadata) -> i64 {
378 fn mtime_seconds(metadata: &std::fs::Metadata) -> i64 {
379 // Going through `Metadata::modified()` would be portable, but would take
379 // Going through `Metadata::modified()` would be portable, but would take
380 // care to construct a `SystemTime` value with sub-second precision just
380 // care to construct a `SystemTime` value with sub-second precision just
381 // for us to throw that away here.
381 // for us to throw that away here.
382 use std::os::unix::fs::MetadataExt;
382 use std::os::unix::fs::MetadataExt;
383 metadata.mtime()
383 metadata.mtime()
384 }
384 }
385
385
386 struct DirEntry {
386 struct DirEntry {
387 base_name: HgPathBuf,
387 base_name: HgPathBuf,
388 full_path: PathBuf,
388 full_path: PathBuf,
389 metadata: std::fs::Metadata,
389 metadata: std::fs::Metadata,
390 }
390 }
391
391
392 impl DirEntry {
392 impl DirEntry {
393 /// Returns **unsorted** entries in the given directory, with name and
393 /// Returns **unsorted** entries in the given directory, with name and
394 /// metadata.
394 /// metadata.
395 ///
395 ///
396 /// If a `.hg` sub-directory is encountered:
396 /// If a `.hg` sub-directory is encountered:
397 ///
397 ///
398 /// * At the repository root, ignore that sub-directory
398 /// * At the repository root, ignore that sub-directory
399 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
399 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
400 /// list instead.
400 /// list instead.
401 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
401 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
402 let mut results = Vec::new();
402 let mut results = Vec::new();
403 for entry in path.read_dir()? {
403 for entry in path.read_dir()? {
404 let entry = entry?;
404 let entry = entry?;
405 let metadata = entry.metadata()?;
405 let metadata = entry.metadata()?;
406 let name = get_bytes_from_os_string(entry.file_name());
406 let name = get_bytes_from_os_string(entry.file_name());
407 // FIXME don't do this when cached
407 // FIXME don't do this when cached
408 if name == b".hg" {
408 if name == b".hg" {
409 if is_at_repo_root {
409 if is_at_repo_root {
410 // Skip the repo’s own .hg (might be a symlink)
410 // Skip the repo’s own .hg (might be a symlink)
411 continue;
411 continue;
412 } else if metadata.is_dir() {
412 } else if metadata.is_dir() {
413 // A .hg sub-directory at another location means a subrepo,
413 // A .hg sub-directory at another location means a subrepo,
414 // skip it entirely.
414 // skip it entirely.
415 return Ok(Vec::new());
415 return Ok(Vec::new());
416 }
416 }
417 }
417 }
418 results.push(DirEntry {
418 results.push(DirEntry {
419 base_name: name.into(),
419 base_name: name.into(),
420 full_path: entry.path(),
420 full_path: entry.path(),
421 metadata,
421 metadata,
422 })
422 })
423 }
423 }
424 Ok(results)
424 Ok(results)
425 }
425 }
426 }
426 }
General Comments 0
You need to be logged in to leave comments. Login now