##// END OF EJS Templates
dirstate-v2: Change the on-disk format to be tree-shaped...
Simon Sapin -
r48058:2a9ddc80 default
parent child Browse files
Show More
@@ -1,665 +1,655 b''
1 use bytes_cast::BytesCast;
1 use bytes_cast::BytesCast;
2 use micro_timer::timed;
2 use micro_timer::timed;
3 use std::borrow::Cow;
3 use std::borrow::Cow;
4 use std::convert::TryInto;
4 use std::convert::TryInto;
5 use std::path::PathBuf;
5 use std::path::PathBuf;
6
6
7 use super::on_disk::V2_FORMAT_MARKER;
7 use super::on_disk;
8 use super::path_with_basename::WithBasename;
8 use super::path_with_basename::WithBasename;
9 use crate::dirstate::parsers::clear_ambiguous_mtime;
9 use crate::dirstate::parsers::clear_ambiguous_mtime;
10 use crate::dirstate::parsers::pack_entry;
10 use crate::dirstate::parsers::pack_entry;
11 use crate::dirstate::parsers::packed_entry_size;
11 use crate::dirstate::parsers::packed_entry_size;
12 use crate::dirstate::parsers::parse_dirstate_entries;
12 use crate::dirstate::parsers::parse_dirstate_entries;
13 use crate::dirstate::parsers::Timestamp;
13 use crate::dirstate::parsers::Timestamp;
14 use crate::errors::HgError;
15 use crate::matchers::Matcher;
14 use crate::matchers::Matcher;
16 use crate::utils::hg_path::{HgPath, HgPathBuf};
15 use crate::utils::hg_path::{HgPath, HgPathBuf};
17 use crate::utils::SliceExt;
18 use crate::CopyMapIter;
16 use crate::CopyMapIter;
19 use crate::DirstateEntry;
17 use crate::DirstateEntry;
20 use crate::DirstateError;
18 use crate::DirstateError;
21 use crate::DirstateMapError;
19 use crate::DirstateMapError;
22 use crate::DirstateParents;
20 use crate::DirstateParents;
23 use crate::DirstateStatus;
21 use crate::DirstateStatus;
24 use crate::EntryState;
22 use crate::EntryState;
25 use crate::FastHashMap;
23 use crate::FastHashMap;
26 use crate::PatternFileWarning;
24 use crate::PatternFileWarning;
27 use crate::StateMapIter;
25 use crate::StateMapIter;
28 use crate::StatusError;
26 use crate::StatusError;
29 use crate::StatusOptions;
27 use crate::StatusOptions;
30
28
31 pub struct DirstateMap<'on_disk> {
29 pub struct DirstateMap<'on_disk> {
32 /// Contents of the `.hg/dirstate` file
30 /// Contents of the `.hg/dirstate` file
33 on_disk: &'on_disk [u8],
31 pub(super) on_disk: &'on_disk [u8],
34
32
35 pub(super) root: ChildNodes<'on_disk>,
33 pub(super) root: ChildNodes<'on_disk>,
36
34
37 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
35 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
38 nodes_with_entry_count: usize,
36 pub(super) nodes_with_entry_count: u32,
39
37
40 /// Number of nodes anywhere in the tree that have
38 /// Number of nodes anywhere in the tree that have
41 /// `.copy_source.is_some()`.
39 /// `.copy_source.is_some()`.
42 nodes_with_copy_source_count: usize,
40 pub(super) nodes_with_copy_source_count: u32,
43 }
41 }
44
42
45 /// Using a plain `HgPathBuf` of the full path from the repository root as a
43 /// Using a plain `HgPathBuf` of the full path from the repository root as a
46 /// map key would also work: all paths in a given map have the same parent
44 /// map key would also work: all paths in a given map have the same parent
47 /// path, so comparing full paths gives the same result as comparing base
45 /// path, so comparing full paths gives the same result as comparing base
48 /// names. However `HashMap` would waste time always re-hashing the same
46 /// names. However `HashMap` would waste time always re-hashing the same
49 /// string prefix.
47 /// string prefix.
50 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
48 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
51 pub(super) type ChildNodes<'on_disk> =
49 pub(super) type ChildNodes<'on_disk> =
52 FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>;
50 FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>;
53
51
54 /// Represents a file or a directory
52 /// Represents a file or a directory
55 #[derive(Default)]
53 #[derive(Default)]
56 pub(super) struct Node<'on_disk> {
54 pub(super) struct Node<'on_disk> {
57 /// `None` for directories
55 /// `None` for directories
58 pub(super) entry: Option<DirstateEntry>,
56 pub(super) entry: Option<DirstateEntry>,
59
57
60 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
58 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
61
59
62 pub(super) children: ChildNodes<'on_disk>,
60 pub(super) children: ChildNodes<'on_disk>,
63
61
64 /// How many (non-inclusive) descendants of this node are tracked files
62 /// How many (non-inclusive) descendants of this node are tracked files
65 tracked_descendants_count: usize,
63 pub(super) tracked_descendants_count: u32,
66 }
64 }
67
65
68 impl<'on_disk> Node<'on_disk> {
66 impl<'on_disk> Node<'on_disk> {
69 pub(super) fn state(&self) -> Option<EntryState> {
67 pub(super) fn state(&self) -> Option<EntryState> {
70 self.entry.as_ref().map(|entry| entry.state)
68 self.entry.as_ref().map(|entry| entry.state)
71 }
69 }
72
70
73 pub(super) fn sorted<'tree>(
71 pub(super) fn sorted<'tree>(
74 nodes: &'tree mut ChildNodes<'on_disk>,
72 nodes: &'tree mut ChildNodes<'on_disk>,
75 ) -> Vec<(&'tree NodeKey<'on_disk>, &'tree mut Self)> {
73 ) -> Vec<(&'tree NodeKey<'on_disk>, &'tree mut Self)> {
76 let mut vec: Vec<_> = nodes.iter_mut().collect();
74 let mut vec: Vec<_> = nodes.iter_mut().collect();
77 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
75 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
78 // https://github.com/rust-lang/rust/issues/34162
76 // https://github.com/rust-lang/rust/issues/34162
79 vec.sort_unstable_by(|(path1, _), (path2, _)| path1.cmp(path2));
77 vec.sort_unstable_by(|(path1, _), (path2, _)| path1.cmp(path2));
80 vec
78 vec
81 }
79 }
82 }
80 }
83
81
84 /// `(full_path, entry, copy_source)`
82 /// `(full_path, entry, copy_source)`
85 type NodeDataMut<'tree, 'on_disk> = (
83 type NodeDataMut<'tree, 'on_disk> = (
86 &'tree HgPath,
84 &'tree HgPath,
87 &'tree mut Option<DirstateEntry>,
85 &'tree mut Option<DirstateEntry>,
88 &'tree mut Option<Cow<'on_disk, HgPath>>,
86 &'tree mut Option<Cow<'on_disk, HgPath>>,
89 );
87 );
90
88
91 impl<'on_disk> DirstateMap<'on_disk> {
89 impl<'on_disk> DirstateMap<'on_disk> {
90 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
91 Self {
92 on_disk,
93 root: ChildNodes::default(),
94 nodes_with_entry_count: 0,
95 nodes_with_copy_source_count: 0,
96 }
97 }
98
92 #[timed]
99 #[timed]
93 pub fn new_v2(
100 pub fn new_v2(
94 on_disk: &'on_disk [u8],
101 on_disk: &'on_disk [u8],
95 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
102 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
96 if let Some(rest) = on_disk.drop_prefix(V2_FORMAT_MARKER) {
103 on_disk::read(on_disk)
97 Self::new_v1(rest)
98 } else if on_disk.is_empty() {
99 Self::new_v1(on_disk)
100 } else {
101 return Err(HgError::corrupted(
102 "missing dirstate-v2 magic number",
103 )
104 .into());
105 }
106 }
104 }
107
105
108 #[timed]
106 #[timed]
109 pub fn new_v1(
107 pub fn new_v1(
110 on_disk: &'on_disk [u8],
108 on_disk: &'on_disk [u8],
111 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
109 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
112 let mut map = Self {
110 let mut map = Self::empty(on_disk);
113 on_disk,
114 root: ChildNodes::default(),
115 nodes_with_entry_count: 0,
116 nodes_with_copy_source_count: 0,
117 };
118 if map.on_disk.is_empty() {
111 if map.on_disk.is_empty() {
119 return Ok((map, None));
112 return Ok((map, None));
120 }
113 }
121
114
122 let parents = parse_dirstate_entries(
115 let parents = parse_dirstate_entries(
123 map.on_disk,
116 map.on_disk,
124 |path, entry, copy_source| {
117 |path, entry, copy_source| {
125 let tracked = entry.state.is_tracked();
118 let tracked = entry.state.is_tracked();
126 let node = Self::get_or_insert_node(
119 let node = Self::get_or_insert_node(
127 &mut map.root,
120 &mut map.root,
128 path,
121 path,
129 WithBasename::to_cow_borrowed,
122 WithBasename::to_cow_borrowed,
130 |ancestor| {
123 |ancestor| {
131 if tracked {
124 if tracked {
132 ancestor.tracked_descendants_count += 1
125 ancestor.tracked_descendants_count += 1
133 }
126 }
134 },
127 },
135 );
128 );
136 assert!(
129 assert!(
137 node.entry.is_none(),
130 node.entry.is_none(),
138 "duplicate dirstate entry in read"
131 "duplicate dirstate entry in read"
139 );
132 );
140 assert!(
133 assert!(
141 node.copy_source.is_none(),
134 node.copy_source.is_none(),
142 "duplicate dirstate entry in read"
135 "duplicate dirstate entry in read"
143 );
136 );
144 node.entry = Some(*entry);
137 node.entry = Some(*entry);
145 node.copy_source = copy_source.map(Cow::Borrowed);
138 node.copy_source = copy_source.map(Cow::Borrowed);
146 map.nodes_with_entry_count += 1;
139 map.nodes_with_entry_count += 1;
147 if copy_source.is_some() {
140 if copy_source.is_some() {
148 map.nodes_with_copy_source_count += 1
141 map.nodes_with_copy_source_count += 1
149 }
142 }
150 },
143 },
151 )?;
144 )?;
152 let parents = Some(parents.clone());
145 let parents = Some(parents.clone());
153
146
154 Ok((map, parents))
147 Ok((map, parents))
155 }
148 }
156
149
157 fn get_node(&self, path: &HgPath) -> Option<&Node> {
150 fn get_node(&self, path: &HgPath) -> Option<&Node> {
158 let mut children = &self.root;
151 let mut children = &self.root;
159 let mut components = path.components();
152 let mut components = path.components();
160 let mut component =
153 let mut component =
161 components.next().expect("expected at least one components");
154 components.next().expect("expected at least one components");
162 loop {
155 loop {
163 let child = children.get(component)?;
156 let child = children.get(component)?;
164 if let Some(next_component) = components.next() {
157 if let Some(next_component) = components.next() {
165 component = next_component;
158 component = next_component;
166 children = &child.children;
159 children = &child.children;
167 } else {
160 } else {
168 return Some(child);
161 return Some(child);
169 }
162 }
170 }
163 }
171 }
164 }
172
165
173 /// Returns a mutable reference to the node at `path` if it exists
166 /// Returns a mutable reference to the node at `path` if it exists
174 ///
167 ///
175 /// This takes `root` instead of `&mut self` so that callers can mutate
168 /// This takes `root` instead of `&mut self` so that callers can mutate
176 /// other fields while the returned borrow is still valid
169 /// other fields while the returned borrow is still valid
177 fn get_node_mut<'tree>(
170 fn get_node_mut<'tree>(
178 root: &'tree mut ChildNodes<'on_disk>,
171 root: &'tree mut ChildNodes<'on_disk>,
179 path: &HgPath,
172 path: &HgPath,
180 ) -> Option<&'tree mut Node<'on_disk>> {
173 ) -> Option<&'tree mut Node<'on_disk>> {
181 let mut children = root;
174 let mut children = root;
182 let mut components = path.components();
175 let mut components = path.components();
183 let mut component =
176 let mut component =
184 components.next().expect("expected at least one components");
177 components.next().expect("expected at least one components");
185 loop {
178 loop {
186 let child = children.get_mut(component)?;
179 let child = children.get_mut(component)?;
187 if let Some(next_component) = components.next() {
180 if let Some(next_component) = components.next() {
188 component = next_component;
181 component = next_component;
189 children = &mut child.children;
182 children = &mut child.children;
190 } else {
183 } else {
191 return Some(child);
184 return Some(child);
192 }
185 }
193 }
186 }
194 }
187 }
195
188
196 fn get_or_insert_node<'tree, 'path>(
189 fn get_or_insert_node<'tree, 'path>(
197 root: &'tree mut ChildNodes<'on_disk>,
190 root: &'tree mut ChildNodes<'on_disk>,
198 path: &'path HgPath,
191 path: &'path HgPath,
199 to_cow: impl Fn(
192 to_cow: impl Fn(
200 WithBasename<&'path HgPath>,
193 WithBasename<&'path HgPath>,
201 ) -> WithBasename<Cow<'on_disk, HgPath>>,
194 ) -> WithBasename<Cow<'on_disk, HgPath>>,
202 mut each_ancestor: impl FnMut(&mut Node),
195 mut each_ancestor: impl FnMut(&mut Node),
203 ) -> &'tree mut Node<'on_disk> {
196 ) -> &'tree mut Node<'on_disk> {
204 let mut child_nodes = root;
197 let mut child_nodes = root;
205 let mut inclusive_ancestor_paths =
198 let mut inclusive_ancestor_paths =
206 WithBasename::inclusive_ancestors_of(path);
199 WithBasename::inclusive_ancestors_of(path);
207 let mut ancestor_path = inclusive_ancestor_paths
200 let mut ancestor_path = inclusive_ancestor_paths
208 .next()
201 .next()
209 .expect("expected at least one inclusive ancestor");
202 .expect("expected at least one inclusive ancestor");
210 loop {
203 loop {
211 // TODO: can we avoid allocating an owned key in cases where the
204 // TODO: can we avoid allocating an owned key in cases where the
212 // map already contains that key, without introducing double
205 // map already contains that key, without introducing double
213 // lookup?
206 // lookup?
214 let child_node =
207 let child_node =
215 child_nodes.entry(to_cow(ancestor_path)).or_default();
208 child_nodes.entry(to_cow(ancestor_path)).or_default();
216 if let Some(next) = inclusive_ancestor_paths.next() {
209 if let Some(next) = inclusive_ancestor_paths.next() {
217 each_ancestor(child_node);
210 each_ancestor(child_node);
218 ancestor_path = next;
211 ancestor_path = next;
219 child_nodes = &mut child_node.children;
212 child_nodes = &mut child_node.children;
220 } else {
213 } else {
221 return child_node;
214 return child_node;
222 }
215 }
223 }
216 }
224 }
217 }
225
218
226 fn add_or_remove_file(
219 fn add_or_remove_file(
227 &mut self,
220 &mut self,
228 path: &HgPath,
221 path: &HgPath,
229 old_state: EntryState,
222 old_state: EntryState,
230 new_entry: DirstateEntry,
223 new_entry: DirstateEntry,
231 ) {
224 ) {
232 let tracked_count_increment =
225 let tracked_count_increment =
233 match (old_state.is_tracked(), new_entry.state.is_tracked()) {
226 match (old_state.is_tracked(), new_entry.state.is_tracked()) {
234 (false, true) => 1,
227 (false, true) => 1,
235 (true, false) => -1,
228 (true, false) => -1,
236 _ => 0,
229 _ => 0,
237 };
230 };
238
231
239 let node = Self::get_or_insert_node(
232 let node = Self::get_or_insert_node(
240 &mut self.root,
233 &mut self.root,
241 path,
234 path,
242 WithBasename::to_cow_owned,
235 WithBasename::to_cow_owned,
243 |ancestor| {
236 |ancestor| {
244 // We can’t use `+= increment` because the counter is unsigned,
237 // We can’t use `+= increment` because the counter is unsigned,
245 // and we want debug builds to detect accidental underflow
238 // and we want debug builds to detect accidental underflow
246 // through zero
239 // through zero
247 match tracked_count_increment {
240 match tracked_count_increment {
248 1 => ancestor.tracked_descendants_count += 1,
241 1 => ancestor.tracked_descendants_count += 1,
249 -1 => ancestor.tracked_descendants_count -= 1,
242 -1 => ancestor.tracked_descendants_count -= 1,
250 _ => {}
243 _ => {}
251 }
244 }
252 },
245 },
253 );
246 );
254 if node.entry.is_none() {
247 if node.entry.is_none() {
255 self.nodes_with_entry_count += 1
248 self.nodes_with_entry_count += 1
256 }
249 }
257 node.entry = Some(new_entry)
250 node.entry = Some(new_entry)
258 }
251 }
259
252
260 fn iter_nodes<'a>(
253 fn iter_nodes<'a>(
261 &'a self,
254 &'a self,
262 ) -> impl Iterator<Item = (&'a HgPath, &'a Node)> + 'a {
255 ) -> impl Iterator<Item = (&'a HgPath, &'a Node)> + 'a {
263 // Depth first tree traversal.
256 // Depth first tree traversal.
264 //
257 //
265 // If we could afford internal iteration and recursion,
258 // If we could afford internal iteration and recursion,
266 // this would look like:
259 // this would look like:
267 //
260 //
268 // ```
261 // ```
269 // fn traverse_children(
262 // fn traverse_children(
270 // children: &ChildNodes,
263 // children: &ChildNodes,
271 // each: &mut impl FnMut(&Node),
264 // each: &mut impl FnMut(&Node),
272 // ) {
265 // ) {
273 // for child in children.values() {
266 // for child in children.values() {
274 // traverse_children(&child.children, each);
267 // traverse_children(&child.children, each);
275 // each(child);
268 // each(child);
276 // }
269 // }
277 // }
270 // }
278 // ```
271 // ```
279 //
272 //
280 // However we want an external iterator and therefore can’t use the
273 // However we want an external iterator and therefore can’t use the
281 // call stack. Use an explicit stack instead:
274 // call stack. Use an explicit stack instead:
282 let mut stack = Vec::new();
275 let mut stack = Vec::new();
283 let mut iter = self.root.iter();
276 let mut iter = self.root.iter();
284 std::iter::from_fn(move || {
277 std::iter::from_fn(move || {
285 while let Some((key, child_node)) = iter.next() {
278 while let Some((key, child_node)) = iter.next() {
286 // Pseudo-recursion
279 // Pseudo-recursion
287 let new_iter = child_node.children.iter();
280 let new_iter = child_node.children.iter();
288 let old_iter = std::mem::replace(&mut iter, new_iter);
281 let old_iter = std::mem::replace(&mut iter, new_iter);
289 let key = &**key.full_path();
282 let key = &**key.full_path();
290 stack.push((key, child_node, old_iter));
283 stack.push((key, child_node, old_iter));
291 }
284 }
292 // Found the end of a `children.iter()` iterator.
285 // Found the end of a `children.iter()` iterator.
293 if let Some((key, child_node, next_iter)) = stack.pop() {
286 if let Some((key, child_node, next_iter)) = stack.pop() {
294 // "Return" from pseudo-recursion by restoring state from the
287 // "Return" from pseudo-recursion by restoring state from the
295 // explicit stack
288 // explicit stack
296 iter = next_iter;
289 iter = next_iter;
297
290
298 Some((key, child_node))
291 Some((key, child_node))
299 } else {
292 } else {
300 // Reached the bottom of the stack, we’re done
293 // Reached the bottom of the stack, we’re done
301 None
294 None
302 }
295 }
303 })
296 })
304 }
297 }
305
298
306 /// Mutable iterator for the `(entry, copy source)` of each node.
299 /// Mutable iterator for the `(entry, copy source)` of each node.
307 ///
300 ///
308 /// It would not be safe to yield mutable references to nodes themeselves
301 /// It would not be safe to yield mutable references to nodes themeselves
309 /// with `-> impl Iterator<Item = &mut Node>` since child nodes are
302 /// with `-> impl Iterator<Item = &mut Node>` since child nodes are
310 /// reachable from their ancestor nodes, potentially creating multiple
303 /// reachable from their ancestor nodes, potentially creating multiple
311 /// `&mut` references to a given node.
304 /// `&mut` references to a given node.
312 fn iter_node_data_mut<'tree>(
305 fn iter_node_data_mut<'tree>(
313 &'tree mut self,
306 &'tree mut self,
314 ) -> impl Iterator<Item = NodeDataMut<'tree, 'on_disk>> + 'tree {
307 ) -> impl Iterator<Item = NodeDataMut<'tree, 'on_disk>> + 'tree {
315 // Explict stack for pseudo-recursion, see `iter_nodes` above.
308 // Explict stack for pseudo-recursion, see `iter_nodes` above.
316 let mut stack = Vec::new();
309 let mut stack = Vec::new();
317 let mut iter = self.root.iter_mut();
310 let mut iter = self.root.iter_mut();
318 std::iter::from_fn(move || {
311 std::iter::from_fn(move || {
319 while let Some((key, child_node)) = iter.next() {
312 while let Some((key, child_node)) = iter.next() {
320 // Pseudo-recursion
313 // Pseudo-recursion
321 let data = (
314 let data = (
322 &**key.full_path(),
315 &**key.full_path(),
323 &mut child_node.entry,
316 &mut child_node.entry,
324 &mut child_node.copy_source,
317 &mut child_node.copy_source,
325 );
318 );
326 let new_iter = child_node.children.iter_mut();
319 let new_iter = child_node.children.iter_mut();
327 let old_iter = std::mem::replace(&mut iter, new_iter);
320 let old_iter = std::mem::replace(&mut iter, new_iter);
328 stack.push((data, old_iter));
321 stack.push((data, old_iter));
329 }
322 }
330 // Found the end of a `children.values_mut()` iterator.
323 // Found the end of a `children.values_mut()` iterator.
331 if let Some((data, next_iter)) = stack.pop() {
324 if let Some((data, next_iter)) = stack.pop() {
332 // "Return" from pseudo-recursion by restoring state from the
325 // "Return" from pseudo-recursion by restoring state from the
333 // explicit stack
326 // explicit stack
334 iter = next_iter;
327 iter = next_iter;
335
328
336 Some(data)
329 Some(data)
337 } else {
330 } else {
338 // Reached the bottom of the stack, we’re done
331 // Reached the bottom of the stack, we’re done
339 None
332 None
340 }
333 }
341 })
334 })
342 }
335 }
343 }
336 }
344
337
345 impl<'on_disk> super::dispatch::DirstateMapMethods for DirstateMap<'on_disk> {
338 impl<'on_disk> super::dispatch::DirstateMapMethods for DirstateMap<'on_disk> {
346 fn clear(&mut self) {
339 fn clear(&mut self) {
347 self.root.clear();
340 self.root.clear();
348 self.nodes_with_entry_count = 0;
341 self.nodes_with_entry_count = 0;
349 self.nodes_with_copy_source_count = 0;
342 self.nodes_with_copy_source_count = 0;
350 }
343 }
351
344
352 fn add_file(
345 fn add_file(
353 &mut self,
346 &mut self,
354 filename: &HgPath,
347 filename: &HgPath,
355 old_state: EntryState,
348 old_state: EntryState,
356 entry: DirstateEntry,
349 entry: DirstateEntry,
357 ) -> Result<(), DirstateMapError> {
350 ) -> Result<(), DirstateMapError> {
358 self.add_or_remove_file(filename, old_state, entry);
351 self.add_or_remove_file(filename, old_state, entry);
359 Ok(())
352 Ok(())
360 }
353 }
361
354
362 fn remove_file(
355 fn remove_file(
363 &mut self,
356 &mut self,
364 filename: &HgPath,
357 filename: &HgPath,
365 old_state: EntryState,
358 old_state: EntryState,
366 size: i32,
359 size: i32,
367 ) -> Result<(), DirstateMapError> {
360 ) -> Result<(), DirstateMapError> {
368 let entry = DirstateEntry {
361 let entry = DirstateEntry {
369 state: EntryState::Removed,
362 state: EntryState::Removed,
370 mode: 0,
363 mode: 0,
371 size,
364 size,
372 mtime: 0,
365 mtime: 0,
373 };
366 };
374 self.add_or_remove_file(filename, old_state, entry);
367 self.add_or_remove_file(filename, old_state, entry);
375 Ok(())
368 Ok(())
376 }
369 }
377
370
378 fn drop_file(
371 fn drop_file(
379 &mut self,
372 &mut self,
380 filename: &HgPath,
373 filename: &HgPath,
381 old_state: EntryState,
374 old_state: EntryState,
382 ) -> Result<bool, DirstateMapError> {
375 ) -> Result<bool, DirstateMapError> {
383 struct Dropped {
376 struct Dropped {
384 was_tracked: bool,
377 was_tracked: bool,
385 had_entry: bool,
378 had_entry: bool,
386 had_copy_source: bool,
379 had_copy_source: bool,
387 }
380 }
388 fn recur(nodes: &mut ChildNodes, path: &HgPath) -> Option<Dropped> {
381 fn recur(nodes: &mut ChildNodes, path: &HgPath) -> Option<Dropped> {
389 let (first_path_component, rest_of_path) =
382 let (first_path_component, rest_of_path) =
390 path.split_first_component();
383 path.split_first_component();
391 let node = nodes.get_mut(first_path_component)?;
384 let node = nodes.get_mut(first_path_component)?;
392 let dropped;
385 let dropped;
393 if let Some(rest) = rest_of_path {
386 if let Some(rest) = rest_of_path {
394 dropped = recur(&mut node.children, rest)?;
387 dropped = recur(&mut node.children, rest)?;
395 if dropped.was_tracked {
388 if dropped.was_tracked {
396 node.tracked_descendants_count -= 1;
389 node.tracked_descendants_count -= 1;
397 }
390 }
398 } else {
391 } else {
399 dropped = Dropped {
392 dropped = Dropped {
400 was_tracked: node
393 was_tracked: node
401 .entry
394 .entry
402 .as_ref()
395 .as_ref()
403 .map_or(false, |entry| entry.state.is_tracked()),
396 .map_or(false, |entry| entry.state.is_tracked()),
404 had_entry: node.entry.take().is_some(),
397 had_entry: node.entry.take().is_some(),
405 had_copy_source: node.copy_source.take().is_some(),
398 had_copy_source: node.copy_source.take().is_some(),
406 };
399 };
407 }
400 }
408 // After recursion, for both leaf (rest_of_path is None) nodes and
401 // After recursion, for both leaf (rest_of_path is None) nodes and
409 // parent nodes, remove a node if it just became empty.
402 // parent nodes, remove a node if it just became empty.
410 if node.entry.is_none()
403 if node.entry.is_none()
411 && node.copy_source.is_none()
404 && node.copy_source.is_none()
412 && node.children.is_empty()
405 && node.children.is_empty()
413 {
406 {
414 nodes.remove(first_path_component);
407 nodes.remove(first_path_component);
415 }
408 }
416 Some(dropped)
409 Some(dropped)
417 }
410 }
418
411
419 if let Some(dropped) = recur(&mut self.root, filename) {
412 if let Some(dropped) = recur(&mut self.root, filename) {
420 if dropped.had_entry {
413 if dropped.had_entry {
421 self.nodes_with_entry_count -= 1
414 self.nodes_with_entry_count -= 1
422 }
415 }
423 if dropped.had_copy_source {
416 if dropped.had_copy_source {
424 self.nodes_with_copy_source_count -= 1
417 self.nodes_with_copy_source_count -= 1
425 }
418 }
426 Ok(dropped.had_entry)
419 Ok(dropped.had_entry)
427 } else {
420 } else {
428 debug_assert!(!old_state.is_tracked());
421 debug_assert!(!old_state.is_tracked());
429 Ok(false)
422 Ok(false)
430 }
423 }
431 }
424 }
432
425
433 fn clear_ambiguous_times(&mut self, filenames: Vec<HgPathBuf>, now: i32) {
426 fn clear_ambiguous_times(&mut self, filenames: Vec<HgPathBuf>, now: i32) {
434 for filename in filenames {
427 for filename in filenames {
435 if let Some(node) = Self::get_node_mut(&mut self.root, &filename) {
428 if let Some(node) = Self::get_node_mut(&mut self.root, &filename) {
436 if let Some(entry) = node.entry.as_mut() {
429 if let Some(entry) = node.entry.as_mut() {
437 clear_ambiguous_mtime(entry, now);
430 clear_ambiguous_mtime(entry, now);
438 }
431 }
439 }
432 }
440 }
433 }
441 }
434 }
442
435
443 fn non_normal_entries_contains(&mut self, key: &HgPath) -> bool {
436 fn non_normal_entries_contains(&mut self, key: &HgPath) -> bool {
444 self.get_node(key)
437 self.get_node(key)
445 .and_then(|node| node.entry.as_ref())
438 .and_then(|node| node.entry.as_ref())
446 .map_or(false, DirstateEntry::is_non_normal)
439 .map_or(false, DirstateEntry::is_non_normal)
447 }
440 }
448
441
449 fn non_normal_entries_remove(&mut self, _key: &HgPath) {
442 fn non_normal_entries_remove(&mut self, _key: &HgPath) {
450 // Do nothing, this `DirstateMap` does not have a separate "non normal
443 // Do nothing, this `DirstateMap` does not have a separate "non normal
451 // entries" set that need to be kept up to date
444 // entries" set that need to be kept up to date
452 }
445 }
453
446
454 fn non_normal_or_other_parent_paths(
447 fn non_normal_or_other_parent_paths(
455 &mut self,
448 &mut self,
456 ) -> Box<dyn Iterator<Item = &HgPath> + '_> {
449 ) -> Box<dyn Iterator<Item = &HgPath> + '_> {
457 Box::new(self.iter_nodes().filter_map(|(path, node)| {
450 Box::new(self.iter_nodes().filter_map(|(path, node)| {
458 node.entry
451 node.entry
459 .as_ref()
452 .as_ref()
460 .filter(|entry| {
453 .filter(|entry| {
461 entry.is_non_normal() || entry.is_from_other_parent()
454 entry.is_non_normal() || entry.is_from_other_parent()
462 })
455 })
463 .map(|_| path)
456 .map(|_| path)
464 }))
457 }))
465 }
458 }
466
459
467 fn set_non_normal_other_parent_entries(&mut self, _force: bool) {
460 fn set_non_normal_other_parent_entries(&mut self, _force: bool) {
468 // Do nothing, this `DirstateMap` does not have a separate "non normal
461 // Do nothing, this `DirstateMap` does not have a separate "non normal
469 // entries" and "from other parent" sets that need to be recomputed
462 // entries" and "from other parent" sets that need to be recomputed
470 }
463 }
471
464
472 fn iter_non_normal_paths(
465 fn iter_non_normal_paths(
473 &mut self,
466 &mut self,
474 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
467 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
475 self.iter_non_normal_paths_panic()
468 self.iter_non_normal_paths_panic()
476 }
469 }
477
470
478 fn iter_non_normal_paths_panic(
471 fn iter_non_normal_paths_panic(
479 &self,
472 &self,
480 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
473 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
481 Box::new(self.iter_nodes().filter_map(|(path, node)| {
474 Box::new(self.iter_nodes().filter_map(|(path, node)| {
482 node.entry
475 node.entry
483 .as_ref()
476 .as_ref()
484 .filter(|entry| entry.is_non_normal())
477 .filter(|entry| entry.is_non_normal())
485 .map(|_| path)
478 .map(|_| path)
486 }))
479 }))
487 }
480 }
488
481
489 fn iter_other_parent_paths(
482 fn iter_other_parent_paths(
490 &mut self,
483 &mut self,
491 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
484 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
492 Box::new(self.iter_nodes().filter_map(|(path, node)| {
485 Box::new(self.iter_nodes().filter_map(|(path, node)| {
493 node.entry
486 node.entry
494 .as_ref()
487 .as_ref()
495 .filter(|entry| entry.is_from_other_parent())
488 .filter(|entry| entry.is_from_other_parent())
496 .map(|_| path)
489 .map(|_| path)
497 }))
490 }))
498 }
491 }
499
492
500 fn has_tracked_dir(
493 fn has_tracked_dir(
501 &mut self,
494 &mut self,
502 directory: &HgPath,
495 directory: &HgPath,
503 ) -> Result<bool, DirstateMapError> {
496 ) -> Result<bool, DirstateMapError> {
504 if let Some(node) = self.get_node(directory) {
497 if let Some(node) = self.get_node(directory) {
505 // A node without a `DirstateEntry` was created to hold child
498 // A node without a `DirstateEntry` was created to hold child
506 // nodes, and is therefore a directory.
499 // nodes, and is therefore a directory.
507 Ok(node.entry.is_none() && node.tracked_descendants_count > 0)
500 Ok(node.entry.is_none() && node.tracked_descendants_count > 0)
508 } else {
501 } else {
509 Ok(false)
502 Ok(false)
510 }
503 }
511 }
504 }
512
505
513 fn has_dir(
506 fn has_dir(
514 &mut self,
507 &mut self,
515 directory: &HgPath,
508 directory: &HgPath,
516 ) -> Result<bool, DirstateMapError> {
509 ) -> Result<bool, DirstateMapError> {
517 if let Some(node) = self.get_node(directory) {
510 if let Some(node) = self.get_node(directory) {
518 // A node without a `DirstateEntry` was created to hold child
511 // A node without a `DirstateEntry` was created to hold child
519 // nodes, and is therefore a directory.
512 // nodes, and is therefore a directory.
520 Ok(node.entry.is_none())
513 Ok(node.entry.is_none())
521 } else {
514 } else {
522 Ok(false)
515 Ok(false)
523 }
516 }
524 }
517 }
525
518
526 #[timed]
519 #[timed]
527 fn pack_v1(
520 fn pack_v1(
528 &mut self,
521 &mut self,
529 parents: DirstateParents,
522 parents: DirstateParents,
530 now: Timestamp,
523 now: Timestamp,
531 ) -> Result<Vec<u8>, DirstateError> {
524 ) -> Result<Vec<u8>, DirstateError> {
532 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
525 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
533 // reallocations
526 // reallocations
534 let mut size = parents.as_bytes().len();
527 let mut size = parents.as_bytes().len();
535 for (path, node) in self.iter_nodes() {
528 for (path, node) in self.iter_nodes() {
536 if node.entry.is_some() {
529 if node.entry.is_some() {
537 size += packed_entry_size(
530 size += packed_entry_size(
538 path,
531 path,
539 node.copy_source.as_ref().map(|p| &**p),
532 node.copy_source.as_ref().map(|p| &**p),
540 )
533 )
541 }
534 }
542 }
535 }
543
536
544 let mut packed = Vec::with_capacity(size);
537 let mut packed = Vec::with_capacity(size);
545 packed.extend(parents.as_bytes());
538 packed.extend(parents.as_bytes());
546
539
547 let now: i32 = now.0.try_into().expect("time overflow");
540 let now: i32 = now.0.try_into().expect("time overflow");
548 for (path, opt_entry, copy_source) in self.iter_node_data_mut() {
541 for (path, opt_entry, copy_source) in self.iter_node_data_mut() {
549 if let Some(entry) = opt_entry {
542 if let Some(entry) = opt_entry {
550 clear_ambiguous_mtime(entry, now);
543 clear_ambiguous_mtime(entry, now);
551 pack_entry(
544 pack_entry(
552 path,
545 path,
553 entry,
546 entry,
554 copy_source.as_ref().map(|p| &**p),
547 copy_source.as_ref().map(|p| &**p),
555 &mut packed,
548 &mut packed,
556 );
549 );
557 }
550 }
558 }
551 }
559 Ok(packed)
552 Ok(packed)
560 }
553 }
561
554
562 #[timed]
555 #[timed]
563 fn pack_v2(
556 fn pack_v2(
564 &mut self,
557 &mut self,
565 parents: DirstateParents,
558 parents: DirstateParents,
566 now: Timestamp,
559 now: Timestamp,
567 ) -> Result<Vec<u8>, DirstateError> {
560 ) -> Result<Vec<u8>, DirstateError> {
568 // Inefficient but temporary
561 on_disk::write(self, parents, now)
569 let mut v2 = V2_FORMAT_MARKER.to_vec();
570 v2.append(&mut self.pack_v1(parents, now)?);
571 Ok(v2)
572 }
562 }
573
563
574 fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> {
564 fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> {
575 // Do nothing, this `DirstateMap` does not a separate `all_dirs` that
565 // Do nothing, this `DirstateMap` does not a separate `all_dirs` that
576 // needs to be recomputed
566 // needs to be recomputed
577 Ok(())
567 Ok(())
578 }
568 }
579
569
580 fn set_dirs(&mut self) -> Result<(), DirstateMapError> {
570 fn set_dirs(&mut self) -> Result<(), DirstateMapError> {
581 // Do nothing, this `DirstateMap` does not a separate `dirs` that needs
571 // Do nothing, this `DirstateMap` does not a separate `dirs` that needs
582 // to be recomputed
572 // to be recomputed
583 Ok(())
573 Ok(())
584 }
574 }
585
575
586 fn status<'a>(
576 fn status<'a>(
587 &'a mut self,
577 &'a mut self,
588 matcher: &'a (dyn Matcher + Sync),
578 matcher: &'a (dyn Matcher + Sync),
589 root_dir: PathBuf,
579 root_dir: PathBuf,
590 ignore_files: Vec<PathBuf>,
580 ignore_files: Vec<PathBuf>,
591 options: StatusOptions,
581 options: StatusOptions,
592 ) -> Result<(DirstateStatus<'a>, Vec<PatternFileWarning>), StatusError>
582 ) -> Result<(DirstateStatus<'a>, Vec<PatternFileWarning>), StatusError>
593 {
583 {
594 super::status::status(self, matcher, root_dir, ignore_files, options)
584 super::status::status(self, matcher, root_dir, ignore_files, options)
595 }
585 }
596
586
597 fn copy_map_len(&self) -> usize {
587 fn copy_map_len(&self) -> usize {
598 self.nodes_with_copy_source_count
588 self.nodes_with_copy_source_count as usize
599 }
589 }
600
590
601 fn copy_map_iter(&self) -> CopyMapIter<'_> {
591 fn copy_map_iter(&self) -> CopyMapIter<'_> {
602 Box::new(self.iter_nodes().filter_map(|(path, node)| {
592 Box::new(self.iter_nodes().filter_map(|(path, node)| {
603 node.copy_source
593 node.copy_source
604 .as_ref()
594 .as_ref()
605 .map(|copy_source| (path, &**copy_source))
595 .map(|copy_source| (path, &**copy_source))
606 }))
596 }))
607 }
597 }
608
598
609 fn copy_map_contains_key(&self, key: &HgPath) -> bool {
599 fn copy_map_contains_key(&self, key: &HgPath) -> bool {
610 if let Some(node) = self.get_node(key) {
600 if let Some(node) = self.get_node(key) {
611 node.copy_source.is_some()
601 node.copy_source.is_some()
612 } else {
602 } else {
613 false
603 false
614 }
604 }
615 }
605 }
616
606
617 fn copy_map_get(&self, key: &HgPath) -> Option<&HgPath> {
607 fn copy_map_get(&self, key: &HgPath) -> Option<&HgPath> {
618 self.get_node(key)?.copy_source.as_ref().map(|p| &**p)
608 self.get_node(key)?.copy_source.as_ref().map(|p| &**p)
619 }
609 }
620
610
621 fn copy_map_remove(&mut self, key: &HgPath) -> Option<HgPathBuf> {
611 fn copy_map_remove(&mut self, key: &HgPath) -> Option<HgPathBuf> {
622 let count = &mut self.nodes_with_copy_source_count;
612 let count = &mut self.nodes_with_copy_source_count;
623 Self::get_node_mut(&mut self.root, key).and_then(|node| {
613 Self::get_node_mut(&mut self.root, key).and_then(|node| {
624 if node.copy_source.is_some() {
614 if node.copy_source.is_some() {
625 *count -= 1
615 *count -= 1
626 }
616 }
627 node.copy_source.take().map(Cow::into_owned)
617 node.copy_source.take().map(Cow::into_owned)
628 })
618 })
629 }
619 }
630
620
631 fn copy_map_insert(
621 fn copy_map_insert(
632 &mut self,
622 &mut self,
633 key: HgPathBuf,
623 key: HgPathBuf,
634 value: HgPathBuf,
624 value: HgPathBuf,
635 ) -> Option<HgPathBuf> {
625 ) -> Option<HgPathBuf> {
636 let node = Self::get_or_insert_node(
626 let node = Self::get_or_insert_node(
637 &mut self.root,
627 &mut self.root,
638 &key,
628 &key,
639 WithBasename::to_cow_owned,
629 WithBasename::to_cow_owned,
640 |_ancestor| {},
630 |_ancestor| {},
641 );
631 );
642 if node.copy_source.is_none() {
632 if node.copy_source.is_none() {
643 self.nodes_with_copy_source_count += 1
633 self.nodes_with_copy_source_count += 1
644 }
634 }
645 node.copy_source.replace(value.into()).map(Cow::into_owned)
635 node.copy_source.replace(value.into()).map(Cow::into_owned)
646 }
636 }
647
637
648 fn len(&self) -> usize {
638 fn len(&self) -> usize {
649 self.nodes_with_entry_count
639 self.nodes_with_entry_count as usize
650 }
640 }
651
641
652 fn contains_key(&self, key: &HgPath) -> bool {
642 fn contains_key(&self, key: &HgPath) -> bool {
653 self.get(key).is_some()
643 self.get(key).is_some()
654 }
644 }
655
645
656 fn get(&self, key: &HgPath) -> Option<&DirstateEntry> {
646 fn get(&self, key: &HgPath) -> Option<&DirstateEntry> {
657 self.get_node(key)?.entry.as_ref()
647 self.get_node(key)?.entry.as_ref()
658 }
648 }
659
649
660 fn iter(&self) -> StateMapIter<'_> {
650 fn iter(&self) -> StateMapIter<'_> {
661 Box::new(self.iter_nodes().filter_map(|(path, node)| {
651 Box::new(self.iter_nodes().filter_map(|(path, node)| {
662 node.entry.as_ref().map(|entry| (path, entry))
652 node.entry.as_ref().map(|entry| (path, entry))
663 }))
653 }))
664 }
654 }
665 }
655 }
@@ -1,4 +1,335 b''
1 //! The "version 2" disk representation of the dirstate
2 //!
3 //! # File format
4 //!
5 //! The file starts with a fixed-sized header, whose layout is defined by the
6 //! `Header` struct. Its `root` field contains the slice (offset and length) to
7 //! the nodes representing the files and directories at the root of the
8 //! repository. Each node is also fixed-size, defined by the `Node` struct.
9 //! Nodes in turn contain slices to variable-size paths, and to their own child
10 //! nodes (if any) for nested files and directories.
11
12 use crate::dirstate::parsers::clear_ambiguous_mtime;
13 use crate::dirstate::parsers::Timestamp;
14 use crate::dirstate_tree::dirstate_map::{self, DirstateMap};
15 use crate::dirstate_tree::path_with_basename::WithBasename;
16 use crate::errors::HgError;
17 use crate::utils::hg_path::HgPath;
18 use crate::DirstateEntry;
19 use crate::DirstateError;
20 use crate::DirstateParents;
21 use bytes_cast::unaligned::{I32Be, U32Be, U64Be};
22 use bytes_cast::BytesCast;
23 use std::borrow::Cow;
24 use std::convert::{TryFrom, TryInto};
25
1 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
26 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
2 /// Acts like a "magic number". This is a sanity check, not strictly necessary
27 /// This a redundant sanity check more than an actual "magic number" since
3 /// since `.hg/requires` already governs which format should be used.
28 /// `.hg/requires` already governs which format should be used.
4 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
29 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
30
31 #[derive(BytesCast)]
32 #[repr(C)]
33 struct Header {
34 marker: [u8; V2_FORMAT_MARKER.len()],
35
36 /// `dirstatemap.parents()` in `mercurial/dirstate.py` relies on this
37 /// `parents` field being at this offset, immediately after `marker`.
38 parents: DirstateParents,
39
40 root: ChildNodes,
41 nodes_with_entry_count: Size,
42 nodes_with_copy_source_count: Size,
43 }
44
45 #[derive(BytesCast)]
46 #[repr(C)]
47 struct Node {
48 full_path: PathSlice,
49
50 /// In bytes from `self.full_path.start`
51 base_name_start: Size,
52
53 copy_source: OptPathSlice,
54 entry: OptEntry,
55 children: ChildNodes,
56 tracked_descendants_count: Size,
57 }
58
59 /// Either nothing if `state == b'\0'`, or a dirstate entry like in the v1
60 /// format
61 #[derive(BytesCast)]
62 #[repr(C)]
63 struct OptEntry {
64 state: u8,
65 mode: I32Be,
66 mtime: I32Be,
67 size: I32Be,
68 }
69
70 /// Counted in bytes from the start of the file
71 ///
72 /// NOTE: If we decide to never support `.hg/dirstate` files larger than 4 GiB
73 /// we could save space by using `U32Be` instead.
74 type Offset = U64Be;
75
76 /// Counted in number of items
77 ///
78 /// NOTE: not supporting directories with more than 4 billion direct children,
79 /// or filenames more than 4 GiB.
80 type Size = U32Be;
81
82 /// Location of consecutive, fixed-size items.
83 ///
84 /// An item can be a single byte for paths, or a struct with
85 /// `derive(BytesCast)`.
86 #[derive(BytesCast, Copy, Clone)]
87 #[repr(C)]
88 struct Slice {
89 start: Offset,
90 len: Size,
91 }
92
93 /// A contiguous sequence of `len` times `Node`, representing the child nodes
94 /// of either some other node or of the repository root.
95 ///
96 /// Always sorted by ascending `full_path`, to allow binary search.
97 /// Since nodes with the same parent nodes also have the same parent path,
98 /// only the `base_name`s need to be compared during binary search.
99 type ChildNodes = Slice;
100
101 /// A `HgPath` of `len` bytes
102 type PathSlice = Slice;
103
104 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
105 type OptPathSlice = Slice;
106
107 /// Make sure that size-affecting changes are made knowingly
108 fn _static_assert_size_of() {
109 let _ = std::mem::transmute::<Header, [u8; 72]>;
110 let _ = std::mem::transmute::<Node, [u8; 57]>;
111 }
112
113 pub(super) fn read<'on_disk>(
114 on_disk: &'on_disk [u8],
115 ) -> Result<(DirstateMap<'on_disk>, Option<DirstateParents>), DirstateError> {
116 if on_disk.is_empty() {
117 return Ok((DirstateMap::empty(on_disk), None));
118 }
119 let (header, _) = Header::from_bytes(on_disk)
120 .map_err(|_| HgError::corrupted("truncated dirstate-v2"))?;
121 let Header {
122 marker,
123 parents,
124 root,
125 nodes_with_entry_count,
126 nodes_with_copy_source_count,
127 } = header;
128 if marker != V2_FORMAT_MARKER {
129 return Err(HgError::corrupted("missing dirstated-v2 marker").into());
130 }
131 let dirstate_map = DirstateMap {
132 on_disk,
133 root: read_nodes(on_disk, *root)?,
134 nodes_with_entry_count: nodes_with_entry_count.get(),
135 nodes_with_copy_source_count: nodes_with_copy_source_count.get(),
136 };
137 let parents = Some(parents.clone());
138 Ok((dirstate_map, parents))
139 }
140
141 impl Node {
142 pub(super) fn path<'on_disk>(
143 &self,
144 on_disk: &'on_disk [u8],
145 ) -> Result<dirstate_map::NodeKey<'on_disk>, HgError> {
146 let full_path = read_hg_path(on_disk, self.full_path)?;
147 let base_name_start = usize::try_from(self.base_name_start.get())
148 // u32 -> usize, could only panic on a 16-bit CPU
149 .expect("dirstate-v2 base_name_start out of bounds");
150 if base_name_start < full_path.len() {
151 Ok(WithBasename::from_raw_parts(full_path, base_name_start))
152 } else {
153 Err(HgError::corrupted(
154 "dirstate-v2 base_name_start out of bounds",
155 ))
156 }
157 }
158
159 pub(super) fn copy_source<'on_disk>(
160 &self,
161 on_disk: &'on_disk [u8],
162 ) -> Result<Option<Cow<'on_disk, HgPath>>, HgError> {
163 Ok(if self.copy_source.start.get() != 0 {
164 Some(read_hg_path(on_disk, self.copy_source)?)
165 } else {
166 None
167 })
168 }
169
170 pub(super) fn entry(&self) -> Result<Option<DirstateEntry>, HgError> {
171 Ok(if self.entry.state != b'\0' {
172 Some(DirstateEntry {
173 state: self.entry.state.try_into()?,
174 mode: self.entry.mode.get(),
175 mtime: self.entry.mtime.get(),
176 size: self.entry.size.get(),
177 })
178 } else {
179 None
180 })
181 }
182
183 pub(super) fn to_in_memory_node<'on_disk>(
184 &self,
185 on_disk: &'on_disk [u8],
186 ) -> Result<dirstate_map::Node<'on_disk>, HgError> {
187 Ok(dirstate_map::Node {
188 children: read_nodes(on_disk, self.children)?,
189 copy_source: self.copy_source(on_disk)?,
190 entry: self.entry()?,
191 tracked_descendants_count: self.tracked_descendants_count.get(),
192 })
193 }
194 }
195
196 fn read_nodes(
197 on_disk: &[u8],
198 slice: ChildNodes,
199 ) -> Result<dirstate_map::ChildNodes, HgError> {
200 read_slice::<Node>(on_disk, slice)?
201 .iter()
202 .map(|node| {
203 Ok((node.path(on_disk)?, node.to_in_memory_node(on_disk)?))
204 })
205 .collect()
206 }
207
208 fn read_hg_path(on_disk: &[u8], slice: Slice) -> Result<Cow<HgPath>, HgError> {
209 let bytes = read_slice::<u8>(on_disk, slice)?;
210 Ok(Cow::Borrowed(HgPath::new(bytes)))
211 }
212
213 fn read_slice<T>(on_disk: &[u8], slice: Slice) -> Result<&[T], HgError>
214 where
215 T: BytesCast,
216 {
217 // Either `usize::MAX` would result in "out of bounds" error since a single
218 // `&[u8]` cannot occupy the entire addess space.
219 let start = usize::try_from(slice.start.get()).unwrap_or(std::usize::MAX);
220 let len = usize::try_from(slice.len.get()).unwrap_or(std::usize::MAX);
221 on_disk
222 .get(start..)
223 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
224 .map(|(slice, _rest)| slice)
225 .ok_or_else(|| {
226 HgError::corrupted("dirstate v2 slice is out of bounds")
227 })
228 }
229
230 pub(super) fn write(
231 dirstate_map: &mut DirstateMap,
232 parents: DirstateParents,
233 now: Timestamp,
234 ) -> Result<Vec<u8>, DirstateError> {
235 // TODO:Β how do we want to handle this in 2038?
236 let now: i32 = now.0.try_into().expect("time overflow");
237
238 let header_len = std::mem::size_of::<Header>();
239
240 // This ignores the space for paths, and for nodes without an entry.
241 // TODO: better estimate? Skip the `Vec` and write to a file directly?
242 let size_guess = header_len
243 + std::mem::size_of::<Node>()
244 * dirstate_map.nodes_with_entry_count as usize;
245 let mut out = Vec::with_capacity(size_guess);
246
247 // Keep space for the header. We’ll fill it out at the end when we know the
248 // actual offset for the root nodes.
249 out.resize(header_len, 0_u8);
250
251 let root = write_nodes(&mut dirstate_map.root, now, &mut out)?;
252
253 let header = Header {
254 marker: *V2_FORMAT_MARKER,
255 parents: parents,
256 root,
257 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
258 nodes_with_copy_source_count: dirstate_map
259 .nodes_with_copy_source_count
260 .into(),
261 };
262 out[..header_len].copy_from_slice(header.as_bytes());
263 Ok(out)
264 }
265
266 /// Serialize the dirstate to the `v2` format after clearing ambigous `mtime`s.
267 fn write_nodes(
268 nodes: &mut dirstate_map::ChildNodes,
269 now: i32,
270 out: &mut Vec<u8>,
271 ) -> Result<ChildNodes, DirstateError> {
272 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
273 // order. Sort to enable binary search in the written file.
274 let nodes = dirstate_map::Node::sorted(nodes);
275
276 // First accumulate serialized nodes in a `Vec`
277 let mut on_disk_nodes = Vec::with_capacity(nodes.len());
278 for (full_path, node) in nodes {
279 on_disk_nodes.push(Node {
280 children: write_nodes(&mut node.children, now, out)?,
281 tracked_descendants_count: node.tracked_descendants_count.into(),
282 full_path: write_slice::<u8>(
283 full_path.full_path().as_bytes(),
284 out,
285 ),
286 base_name_start: u32::try_from(full_path.base_name_start())
287 // Could only panic for paths over 4 GiB
288 .expect("dirstate-v2 offset overflow")
289 .into(),
290 copy_source: if let Some(source) = &node.copy_source {
291 write_slice::<u8>(source.as_bytes(), out)
292 } else {
293 Slice {
294 start: 0.into(),
295 len: 0.into(),
296 }
297 },
298 entry: if let Some(entry) = &mut node.entry {
299 clear_ambiguous_mtime(entry, now);
300 OptEntry {
301 state: entry.state.into(),
302 mode: entry.mode.into(),
303 mtime: entry.mtime.into(),
304 size: entry.size.into(),
305 }
306 } else {
307 OptEntry {
308 state: b'\0',
309 mode: 0.into(),
310 mtime: 0.into(),
311 size: 0.into(),
312 }
313 },
314 })
315 }
316 // … so we can write them contiguously
317 Ok(write_slice::<Node>(&on_disk_nodes, out))
318 }
319
320 fn write_slice<T>(slice: &[T], out: &mut Vec<u8>) -> Slice
321 where
322 T: BytesCast,
323 {
324 let start = u64::try_from(out.len())
325 // Could only panic on a 128-bit CPU with a dirstate over 16 EiB
326 .expect("dirstate-v2 offset overflow")
327 .into();
328 let len = u32::try_from(slice.len())
329 // Could only panic for paths over 4 GiB or nodes with over 4 billions
330 // child nodes
331 .expect("dirstate-v2 offset overflow")
332 .into();
333 out.extend(slice.as_bytes());
334 Slice { start, len }
335 }
@@ -1,172 +1,187 b''
1 use crate::utils::hg_path::HgPath;
1 use crate::utils::hg_path::HgPath;
2 use std::borrow::{Borrow, Cow};
2 use std::borrow::{Borrow, Cow};
3
3
4 /// Wraps `HgPath` or `HgPathBuf` to make it behave "as" its last path
4 /// Wraps `HgPath` or `HgPathBuf` to make it behave "as" its last path
5 /// component, a.k.a. its base name (as in Python’s `os.path.basename`), but
5 /// component, a.k.a. its base name (as in Python’s `os.path.basename`), but
6 /// also allow recovering the full path.
6 /// also allow recovering the full path.
7 ///
7 ///
8 /// "Behaving as" means that equality and comparison consider only the base
8 /// "Behaving as" means that equality and comparison consider only the base
9 /// name, and `std::borrow::Borrow` is implemented to return only the base
9 /// name, and `std::borrow::Borrow` is implemented to return only the base
10 /// name. This allows using the base name as a map key while still being able
10 /// name. This allows using the base name as a map key while still being able
11 /// to recover the full path, in a single memory allocation.
11 /// to recover the full path, in a single memory allocation.
12 #[derive(Debug)]
12 #[derive(Debug)]
13 pub struct WithBasename<T> {
13 pub struct WithBasename<T> {
14 full_path: T,
14 full_path: T,
15
15
16 /// The position after the last slash separator in `full_path`, or `0`
16 /// The position after the last slash separator in `full_path`, or `0`
17 /// if there is no slash.
17 /// if there is no slash.
18 base_name_start: usize,
18 base_name_start: usize,
19 }
19 }
20
20
21 impl<T> WithBasename<T> {
21 impl<T> WithBasename<T> {
22 pub fn full_path(&self) -> &T {
22 pub fn full_path(&self) -> &T {
23 &self.full_path
23 &self.full_path
24 }
24 }
25 }
25 }
26
26
27 fn find_base_name_start(full_path: &HgPath) -> usize {
28 if let Some(last_slash_position) =
29 full_path.as_bytes().iter().rposition(|&byte| byte == b'/')
30 {
31 last_slash_position + 1
32 } else {
33 0
34 }
35 }
36
27 impl<T: AsRef<HgPath>> WithBasename<T> {
37 impl<T: AsRef<HgPath>> WithBasename<T> {
28 pub fn new(full_path: T) -> Self {
38 pub fn new(full_path: T) -> Self {
29 let base_name_start = if let Some(last_slash_position) = full_path
39 Self {
30 .as_ref()
40 base_name_start: find_base_name_start(full_path.as_ref()),
31 .as_bytes()
41 full_path,
32 .iter()
42 }
33 .rposition(|&byte| byte == b'/')
43 }
34 {
44
35 last_slash_position + 1
45 pub fn from_raw_parts(full_path: T, base_name_start: usize) -> Self {
36 } else {
46 debug_assert_eq!(
37 0
47 base_name_start,
38 };
48 find_base_name_start(full_path.as_ref())
49 );
39 Self {
50 Self {
40 base_name_start,
51 base_name_start,
41 full_path,
52 full_path,
42 }
53 }
43 }
54 }
44
55
45 pub fn base_name(&self) -> &HgPath {
56 pub fn base_name(&self) -> &HgPath {
46 HgPath::new(
57 HgPath::new(
47 &self.full_path.as_ref().as_bytes()[self.base_name_start..],
58 &self.full_path.as_ref().as_bytes()[self.base_name_start..],
48 )
59 )
49 }
60 }
61
62 pub fn base_name_start(&self) -> usize {
63 self.base_name_start
64 }
50 }
65 }
51
66
52 impl<T: AsRef<HgPath>> Borrow<HgPath> for WithBasename<T> {
67 impl<T: AsRef<HgPath>> Borrow<HgPath> for WithBasename<T> {
53 fn borrow(&self) -> &HgPath {
68 fn borrow(&self) -> &HgPath {
54 self.base_name()
69 self.base_name()
55 }
70 }
56 }
71 }
57
72
58 impl<T: AsRef<HgPath>> std::hash::Hash for WithBasename<T> {
73 impl<T: AsRef<HgPath>> std::hash::Hash for WithBasename<T> {
59 fn hash<H: std::hash::Hasher>(&self, hasher: &mut H) {
74 fn hash<H: std::hash::Hasher>(&self, hasher: &mut H) {
60 self.base_name().hash(hasher)
75 self.base_name().hash(hasher)
61 }
76 }
62 }
77 }
63
78
64 impl<T: AsRef<HgPath> + PartialEq> PartialEq for WithBasename<T> {
79 impl<T: AsRef<HgPath> + PartialEq> PartialEq for WithBasename<T> {
65 fn eq(&self, other: &Self) -> bool {
80 fn eq(&self, other: &Self) -> bool {
66 self.base_name() == other.base_name()
81 self.base_name() == other.base_name()
67 }
82 }
68 }
83 }
69
84
70 impl<T: AsRef<HgPath> + Eq> Eq for WithBasename<T> {}
85 impl<T: AsRef<HgPath> + Eq> Eq for WithBasename<T> {}
71
86
72 impl<T: AsRef<HgPath> + PartialOrd> PartialOrd for WithBasename<T> {
87 impl<T: AsRef<HgPath> + PartialOrd> PartialOrd for WithBasename<T> {
73 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
88 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
74 self.base_name().partial_cmp(other.base_name())
89 self.base_name().partial_cmp(other.base_name())
75 }
90 }
76 }
91 }
77
92
78 impl<T: AsRef<HgPath> + Ord> Ord for WithBasename<T> {
93 impl<T: AsRef<HgPath> + Ord> Ord for WithBasename<T> {
79 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
94 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
80 self.base_name().cmp(other.base_name())
95 self.base_name().cmp(other.base_name())
81 }
96 }
82 }
97 }
83
98
84 impl<'a> WithBasename<&'a HgPath> {
99 impl<'a> WithBasename<&'a HgPath> {
85 pub fn to_cow_borrowed(self) -> WithBasename<Cow<'a, HgPath>> {
100 pub fn to_cow_borrowed(self) -> WithBasename<Cow<'a, HgPath>> {
86 WithBasename {
101 WithBasename {
87 full_path: Cow::Borrowed(self.full_path),
102 full_path: Cow::Borrowed(self.full_path),
88 base_name_start: self.base_name_start,
103 base_name_start: self.base_name_start,
89 }
104 }
90 }
105 }
91
106
92 pub fn to_cow_owned<'b>(self) -> WithBasename<Cow<'b, HgPath>> {
107 pub fn to_cow_owned<'b>(self) -> WithBasename<Cow<'b, HgPath>> {
93 WithBasename {
108 WithBasename {
94 full_path: Cow::Owned(self.full_path.to_owned()),
109 full_path: Cow::Owned(self.full_path.to_owned()),
95 base_name_start: self.base_name_start,
110 base_name_start: self.base_name_start,
96 }
111 }
97 }
112 }
98 }
113 }
99
114
100 impl<'a> WithBasename<&'a HgPath> {
115 impl<'a> WithBasename<&'a HgPath> {
101 /// Returns an iterator of `WithBasename<&HgPath>` for the ancestor
116 /// Returns an iterator of `WithBasename<&HgPath>` for the ancestor
102 /// directory paths of the given `path`, as well as `path` itself.
117 /// directory paths of the given `path`, as well as `path` itself.
103 ///
118 ///
104 /// For example, the full paths of inclusive ancestors of "a/b/c" are "a",
119 /// For example, the full paths of inclusive ancestors of "a/b/c" are "a",
105 /// "a/b", and "a/b/c" in that order.
120 /// "a/b", and "a/b/c" in that order.
106 pub fn inclusive_ancestors_of(
121 pub fn inclusive_ancestors_of(
107 path: &'a HgPath,
122 path: &'a HgPath,
108 ) -> impl Iterator<Item = WithBasename<&'a HgPath>> {
123 ) -> impl Iterator<Item = WithBasename<&'a HgPath>> {
109 let mut slash_positions =
124 let mut slash_positions =
110 path.as_bytes().iter().enumerate().filter_map(|(i, &byte)| {
125 path.as_bytes().iter().enumerate().filter_map(|(i, &byte)| {
111 if byte == b'/' {
126 if byte == b'/' {
112 Some(i)
127 Some(i)
113 } else {
128 } else {
114 None
129 None
115 }
130 }
116 });
131 });
117 let mut opt_next_component_start = Some(0);
132 let mut opt_next_component_start = Some(0);
118 std::iter::from_fn(move || {
133 std::iter::from_fn(move || {
119 opt_next_component_start.take().map(|next_component_start| {
134 opt_next_component_start.take().map(|next_component_start| {
120 if let Some(slash_pos) = slash_positions.next() {
135 if let Some(slash_pos) = slash_positions.next() {
121 opt_next_component_start = Some(slash_pos + 1);
136 opt_next_component_start = Some(slash_pos + 1);
122 Self {
137 Self {
123 full_path: HgPath::new(&path.as_bytes()[..slash_pos]),
138 full_path: HgPath::new(&path.as_bytes()[..slash_pos]),
124 base_name_start: next_component_start,
139 base_name_start: next_component_start,
125 }
140 }
126 } else {
141 } else {
127 // Not setting `opt_next_component_start` here: there will
142 // Not setting `opt_next_component_start` here: there will
128 // be no iteration after this one because `.take()` set it
143 // be no iteration after this one because `.take()` set it
129 // to `None`.
144 // to `None`.
130 Self {
145 Self {
131 full_path: path,
146 full_path: path,
132 base_name_start: next_component_start,
147 base_name_start: next_component_start,
133 }
148 }
134 }
149 }
135 })
150 })
136 })
151 })
137 }
152 }
138 }
153 }
139
154
140 #[test]
155 #[test]
141 fn test() {
156 fn test() {
142 let a = WithBasename::new(HgPath::new("a").to_owned());
157 let a = WithBasename::new(HgPath::new("a").to_owned());
143 assert_eq!(&**a.full_path(), HgPath::new(b"a"));
158 assert_eq!(&**a.full_path(), HgPath::new(b"a"));
144 assert_eq!(a.base_name(), HgPath::new(b"a"));
159 assert_eq!(a.base_name(), HgPath::new(b"a"));
145
160
146 let cba = WithBasename::new(HgPath::new("c/b/a").to_owned());
161 let cba = WithBasename::new(HgPath::new("c/b/a").to_owned());
147 assert_eq!(&**cba.full_path(), HgPath::new(b"c/b/a"));
162 assert_eq!(&**cba.full_path(), HgPath::new(b"c/b/a"));
148 assert_eq!(cba.base_name(), HgPath::new(b"a"));
163 assert_eq!(cba.base_name(), HgPath::new(b"a"));
149
164
150 assert_eq!(a, cba);
165 assert_eq!(a, cba);
151 let borrowed: &HgPath = cba.borrow();
166 let borrowed: &HgPath = cba.borrow();
152 assert_eq!(borrowed, HgPath::new("a"));
167 assert_eq!(borrowed, HgPath::new("a"));
153 }
168 }
154
169
155 #[test]
170 #[test]
156 fn test_inclusive_ancestors() {
171 fn test_inclusive_ancestors() {
157 let mut iter = WithBasename::inclusive_ancestors_of(HgPath::new("a/bb/c"));
172 let mut iter = WithBasename::inclusive_ancestors_of(HgPath::new("a/bb/c"));
158
173
159 let next = iter.next().unwrap();
174 let next = iter.next().unwrap();
160 assert_eq!(*next.full_path(), HgPath::new("a"));
175 assert_eq!(*next.full_path(), HgPath::new("a"));
161 assert_eq!(next.base_name(), HgPath::new("a"));
176 assert_eq!(next.base_name(), HgPath::new("a"));
162
177
163 let next = iter.next().unwrap();
178 let next = iter.next().unwrap();
164 assert_eq!(*next.full_path(), HgPath::new("a/bb"));
179 assert_eq!(*next.full_path(), HgPath::new("a/bb"));
165 assert_eq!(next.base_name(), HgPath::new("bb"));
180 assert_eq!(next.base_name(), HgPath::new("bb"));
166
181
167 let next = iter.next().unwrap();
182 let next = iter.next().unwrap();
168 assert_eq!(*next.full_path(), HgPath::new("a/bb/c"));
183 assert_eq!(*next.full_path(), HgPath::new("a/bb/c"));
169 assert_eq!(next.base_name(), HgPath::new("c"));
184 assert_eq!(next.base_name(), HgPath::new("c"));
170
185
171 assert!(iter.next().is_none());
186 assert!(iter.next().is_none());
172 }
187 }
General Comments 0
You need to be logged in to leave comments. Login now