##// END OF EJS Templates
dirstate-tree: Add `NodeRef` and `ChildNodesRef` enums...
Simon Sapin -
r48124:69530e5d default
parent child Browse files
Show More
@@ -1,639 +1,773 b''
1 1 use bytes_cast::BytesCast;
2 2 use micro_timer::timed;
3 3 use std::borrow::Cow;
4 4 use std::convert::TryInto;
5 5 use std::path::PathBuf;
6 6
7 7 use super::on_disk;
8 8 use super::path_with_basename::WithBasename;
9 9 use crate::dirstate::parsers::pack_entry;
10 10 use crate::dirstate::parsers::packed_entry_size;
11 11 use crate::dirstate::parsers::parse_dirstate_entries;
12 12 use crate::dirstate::parsers::Timestamp;
13 13 use crate::matchers::Matcher;
14 14 use crate::utils::hg_path::{HgPath, HgPathBuf};
15 15 use crate::CopyMapIter;
16 16 use crate::DirstateEntry;
17 17 use crate::DirstateError;
18 18 use crate::DirstateMapError;
19 19 use crate::DirstateParents;
20 20 use crate::DirstateStatus;
21 21 use crate::EntryState;
22 22 use crate::FastHashMap;
23 23 use crate::PatternFileWarning;
24 24 use crate::StateMapIter;
25 25 use crate::StatusError;
26 26 use crate::StatusOptions;
27 27
28 28 pub struct DirstateMap<'on_disk> {
29 29 /// Contents of the `.hg/dirstate` file
30 30 pub(super) on_disk: &'on_disk [u8],
31 31
32 32 pub(super) root: ChildNodes<'on_disk>,
33 33
34 34 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
35 35 pub(super) nodes_with_entry_count: u32,
36 36
37 37 /// Number of nodes anywhere in the tree that have
38 38 /// `.copy_source.is_some()`.
39 39 pub(super) nodes_with_copy_source_count: u32,
40 40 }
41 41
42 42 /// Using a plain `HgPathBuf` of the full path from the repository root as a
43 43 /// map key would also work: all paths in a given map have the same parent
44 44 /// path, so comparing full paths gives the same result as comparing base
45 45 /// names. However `HashMap` would waste time always re-hashing the same
46 46 /// string prefix.
47 47 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
48 pub(super) type ChildNodes<'on_disk> =
49 FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>;
48
49 pub(super) enum ChildNodes<'on_disk> {
50 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
51 }
52
53 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
54 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
55 }
56
57 pub(super) enum NodeRef<'tree, 'on_disk> {
58 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
59 }
60
61 impl Default for ChildNodes<'_> {
62 fn default() -> Self {
63 ChildNodes::InMemory(Default::default())
64 }
65 }
66
67 impl<'on_disk> ChildNodes<'on_disk> {
68 pub(super) fn as_ref<'tree>(
69 &'tree self,
70 ) -> ChildNodesRef<'tree, 'on_disk> {
71 match self {
72 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
73 }
74 }
75
76 pub(super) fn is_empty(&self) -> bool {
77 match self {
78 ChildNodes::InMemory(nodes) => nodes.is_empty(),
79 }
80 }
81
82 pub(super) fn make_mut(
83 &mut self,
84 ) -> &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>> {
85 match self {
86 ChildNodes::InMemory(nodes) => nodes,
87 }
88 }
89 }
90
91 impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
92 pub(super) fn get(
93 &self,
94 base_name: &HgPath,
95 ) -> Option<NodeRef<'tree, 'on_disk>> {
96 match self {
97 ChildNodesRef::InMemory(nodes) => nodes
98 .get_key_value(base_name)
99 .map(|(k, v)| NodeRef::InMemory(k, v)),
100 }
101 }
102
103 /// Iterate in undefined order
104 pub(super) fn iter(
105 &self,
106 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
107 match self {
108 ChildNodesRef::InMemory(nodes) => {
109 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v))
110 }
111 }
112 }
113
114 /// Iterate in parallel in undefined order
115 pub(super) fn par_iter(
116 &self,
117 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
118 {
119 use rayon::prelude::*;
120 match self {
121 ChildNodesRef::InMemory(nodes) => {
122 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v))
123 }
124 }
125 }
126
127 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
128 match self {
129 ChildNodesRef::InMemory(nodes) => {
130 let mut vec: Vec<_> = nodes
131 .iter()
132 .map(|(k, v)| NodeRef::InMemory(k, v))
133 .collect();
134 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
135 // value: https://github.com/rust-lang/rust/issues/34162
136 vec.sort_unstable_by(|a, b| a.base_name().cmp(b.base_name()));
137 vec
138 }
139 }
140 }
141 }
142
143 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
144 pub(super) fn full_path(&self) -> &'tree HgPath {
145 match self {
146 NodeRef::InMemory(path, _node) => path.full_path(),
147 }
148 }
149
150 /// Returns a `Cow` that can borrow 'on_disk but is detached from 'tree
151 pub(super) fn full_path_cow(&self) -> Cow<'on_disk, HgPath> {
152 match self {
153 NodeRef::InMemory(path, _node) => path.full_path().clone(),
154 }
155 }
156
157 pub(super) fn base_name(&self) -> &'tree HgPath {
158 match self {
159 NodeRef::InMemory(path, _node) => path.base_name(),
160 }
161 }
162
163 pub(super) fn children(&self) -> ChildNodesRef<'tree, 'on_disk> {
164 match self {
165 NodeRef::InMemory(_path, node) => node.children.as_ref(),
166 }
167 }
168
169 pub(super) fn copy_source(&self) -> Option<&'tree HgPath> {
170 match self {
171 NodeRef::InMemory(_path, node) => {
172 node.copy_source.as_ref().map(|s| &**s)
173 }
174 }
175 }
176
177 pub(super) fn has_entry(&self) -> bool {
178 match self {
179 NodeRef::InMemory(_path, node) => node.entry.is_some(),
180 }
181 }
182
183 pub(super) fn entry(&self) -> Option<DirstateEntry> {
184 match self {
185 NodeRef::InMemory(_path, node) => node.entry,
186 }
187 }
188 pub(super) fn state(&self) -> Option<EntryState> {
189 match self {
190 NodeRef::InMemory(_path, node) => {
191 node.entry.as_ref().map(|entry| entry.state)
192 }
193 }
194 }
195
196 pub(super) fn tracked_descendants_count(&self) -> u32 {
197 match self {
198 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
199 }
200 }
201 }
50 202
51 203 /// Represents a file or a directory
52 204 #[derive(Default)]
53 205 pub(super) struct Node<'on_disk> {
54 206 /// `None` for directories
55 207 pub(super) entry: Option<DirstateEntry>,
56 208
57 209 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
58 210
59 211 pub(super) children: ChildNodes<'on_disk>,
60 212
61 213 /// How many (non-inclusive) descendants of this node are tracked files
62 214 pub(super) tracked_descendants_count: u32,
63 215 }
64 216
65 impl<'on_disk> Node<'on_disk> {
66 pub(super) fn state(&self) -> Option<EntryState> {
67 self.entry.as_ref().map(|entry| entry.state)
68 }
69
70 pub(super) fn sorted<'tree>(
71 nodes: &'tree ChildNodes<'on_disk>,
72 ) -> Vec<(&'tree NodeKey<'on_disk>, &'tree Self)> {
73 let mut vec: Vec<_> = nodes.iter().collect();
74 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
75 // https://github.com/rust-lang/rust/issues/34162
76 vec.sort_unstable_by(|(path1, _), (path2, _)| path1.cmp(path2));
77 vec
78 }
79 }
80
81 217 impl<'on_disk> DirstateMap<'on_disk> {
82 218 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
83 219 Self {
84 220 on_disk,
85 221 root: ChildNodes::default(),
86 222 nodes_with_entry_count: 0,
87 223 nodes_with_copy_source_count: 0,
88 224 }
89 225 }
90 226
91 227 #[timed]
92 228 pub fn new_v2(
93 229 on_disk: &'on_disk [u8],
94 230 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
95 231 on_disk::read(on_disk)
96 232 }
97 233
98 234 #[timed]
99 235 pub fn new_v1(
100 236 on_disk: &'on_disk [u8],
101 237 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
102 238 let mut map = Self::empty(on_disk);
103 239 if map.on_disk.is_empty() {
104 240 return Ok((map, None));
105 241 }
106 242
107 243 let parents = parse_dirstate_entries(
108 244 map.on_disk,
109 245 |path, entry, copy_source| {
110 246 let tracked = entry.state.is_tracked();
111 247 let node = Self::get_or_insert_node(
112 248 &mut map.root,
113 249 path,
114 250 WithBasename::to_cow_borrowed,
115 251 |ancestor| {
116 252 if tracked {
117 253 ancestor.tracked_descendants_count += 1
118 254 }
119 255 },
120 256 );
121 257 assert!(
122 258 node.entry.is_none(),
123 259 "duplicate dirstate entry in read"
124 260 );
125 261 assert!(
126 262 node.copy_source.is_none(),
127 263 "duplicate dirstate entry in read"
128 264 );
129 265 node.entry = Some(*entry);
130 266 node.copy_source = copy_source.map(Cow::Borrowed);
131 267 map.nodes_with_entry_count += 1;
132 268 if copy_source.is_some() {
133 269 map.nodes_with_copy_source_count += 1
134 270 }
135 271 },
136 272 )?;
137 273 let parents = Some(parents.clone());
138 274
139 275 Ok((map, parents))
140 276 }
141 277
142 fn get_node(&self, path: &HgPath) -> Option<&Node> {
143 let mut children = &self.root;
278 fn get_node<'tree>(
279 &'tree self,
280 path: &HgPath,
281 ) -> Option<NodeRef<'tree, 'on_disk>> {
282 let mut children = self.root.as_ref();
144 283 let mut components = path.components();
145 284 let mut component =
146 285 components.next().expect("expected at least one components");
147 286 loop {
148 287 let child = children.get(component)?;
149 288 if let Some(next_component) = components.next() {
150 289 component = next_component;
151 children = &child.children;
290 children = child.children();
152 291 } else {
153 292 return Some(child);
154 293 }
155 294 }
156 295 }
157 296
158 297 /// Returns a mutable reference to the node at `path` if it exists
159 298 ///
160 299 /// This takes `root` instead of `&mut self` so that callers can mutate
161 300 /// other fields while the returned borrow is still valid
162 301 fn get_node_mut<'tree>(
163 302 root: &'tree mut ChildNodes<'on_disk>,
164 303 path: &HgPath,
165 304 ) -> Option<&'tree mut Node<'on_disk>> {
166 305 let mut children = root;
167 306 let mut components = path.components();
168 307 let mut component =
169 308 components.next().expect("expected at least one components");
170 309 loop {
171 let child = children.get_mut(component)?;
310 let child = children.make_mut().get_mut(component)?;
172 311 if let Some(next_component) = components.next() {
173 312 component = next_component;
174 313 children = &mut child.children;
175 314 } else {
176 315 return Some(child);
177 316 }
178 317 }
179 318 }
180 319
181 320 fn get_or_insert_node<'tree, 'path>(
182 321 root: &'tree mut ChildNodes<'on_disk>,
183 322 path: &'path HgPath,
184 323 to_cow: impl Fn(
185 324 WithBasename<&'path HgPath>,
186 325 ) -> WithBasename<Cow<'on_disk, HgPath>>,
187 326 mut each_ancestor: impl FnMut(&mut Node),
188 327 ) -> &'tree mut Node<'on_disk> {
189 328 let mut child_nodes = root;
190 329 let mut inclusive_ancestor_paths =
191 330 WithBasename::inclusive_ancestors_of(path);
192 331 let mut ancestor_path = inclusive_ancestor_paths
193 332 .next()
194 333 .expect("expected at least one inclusive ancestor");
195 334 loop {
196 335 // TODO: can we avoid allocating an owned key in cases where the
197 336 // map already contains that key, without introducing double
198 337 // lookup?
199 let child_node =
200 child_nodes.entry(to_cow(ancestor_path)).or_default();
338 let child_node = child_nodes
339 .make_mut()
340 .entry(to_cow(ancestor_path))
341 .or_default();
201 342 if let Some(next) = inclusive_ancestor_paths.next() {
202 343 each_ancestor(child_node);
203 344 ancestor_path = next;
204 345 child_nodes = &mut child_node.children;
205 346 } else {
206 347 return child_node;
207 348 }
208 349 }
209 350 }
210 351
211 352 fn add_or_remove_file(
212 353 &mut self,
213 354 path: &HgPath,
214 355 old_state: EntryState,
215 356 new_entry: DirstateEntry,
216 357 ) {
217 358 let tracked_count_increment =
218 359 match (old_state.is_tracked(), new_entry.state.is_tracked()) {
219 360 (false, true) => 1,
220 361 (true, false) => -1,
221 362 _ => 0,
222 363 };
223 364
224 365 let node = Self::get_or_insert_node(
225 366 &mut self.root,
226 367 path,
227 368 WithBasename::to_cow_owned,
228 369 |ancestor| {
229 370 // We can’t use `+= increment` because the counter is unsigned,
230 371 // and we want debug builds to detect accidental underflow
231 372 // through zero
232 373 match tracked_count_increment {
233 374 1 => ancestor.tracked_descendants_count += 1,
234 375 -1 => ancestor.tracked_descendants_count -= 1,
235 376 _ => {}
236 377 }
237 378 },
238 379 );
239 380 if node.entry.is_none() {
240 381 self.nodes_with_entry_count += 1
241 382 }
242 383 node.entry = Some(new_entry)
243 384 }
244 385
245 fn iter_nodes<'a>(
246 &'a self,
247 ) -> impl Iterator<Item = (&'a Cow<'on_disk, HgPath>, &'a Node)> + 'a {
386 fn iter_nodes<'tree>(
387 &'tree self,
388 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> + 'tree {
248 389 // Depth first tree traversal.
249 390 //
250 391 // If we could afford internal iteration and recursion,
251 392 // this would look like:
252 393 //
253 394 // ```
254 395 // fn traverse_children(
255 396 // children: &ChildNodes,
256 397 // each: &mut impl FnMut(&Node),
257 398 // ) {
258 399 // for child in children.values() {
259 400 // traverse_children(&child.children, each);
260 401 // each(child);
261 402 // }
262 403 // }
263 404 // ```
264 405 //
265 406 // However we want an external iterator and therefore can’t use the
266 407 // call stack. Use an explicit stack instead:
267 408 let mut stack = Vec::new();
268 let mut iter = self.root.iter();
409 let mut iter = self.root.as_ref().iter();
269 410 std::iter::from_fn(move || {
270 while let Some((key, child_node)) = iter.next() {
411 while let Some(child_node) = iter.next() {
271 412 // Pseudo-recursion
272 let new_iter = child_node.children.iter();
413 let new_iter = child_node.children().iter();
273 414 let old_iter = std::mem::replace(&mut iter, new_iter);
274 let key = key.full_path();
275 stack.push((key, child_node, old_iter));
415 stack.push((child_node, old_iter));
276 416 }
277 417 // Found the end of a `children.iter()` iterator.
278 if let Some((key, child_node, next_iter)) = stack.pop() {
418 if let Some((child_node, next_iter)) = stack.pop() {
279 419 // "Return" from pseudo-recursion by restoring state from the
280 420 // explicit stack
281 421 iter = next_iter;
282 422
283 Some((key, child_node))
423 Some(child_node)
284 424 } else {
285 425 // Reached the bottom of the stack, we’re done
286 426 None
287 427 }
288 428 })
289 429 }
290 430
291 431 fn clear_known_ambiguous_mtimes(&mut self, paths: &[impl AsRef<HgPath>]) {
292 432 for path in paths {
293 433 if let Some(node) =
294 434 Self::get_node_mut(&mut self.root, path.as_ref())
295 435 {
296 436 if let Some(entry) = node.entry.as_mut() {
297 437 entry.clear_mtime();
298 438 }
299 439 }
300 440 }
301 441 }
302 442 }
303 443
304 444 impl<'on_disk> super::dispatch::DirstateMapMethods for DirstateMap<'on_disk> {
305 445 fn clear(&mut self) {
306 self.root.clear();
446 self.root = Default::default();
307 447 self.nodes_with_entry_count = 0;
308 448 self.nodes_with_copy_source_count = 0;
309 449 }
310 450
311 451 fn add_file(
312 452 &mut self,
313 453 filename: &HgPath,
314 454 old_state: EntryState,
315 455 entry: DirstateEntry,
316 456 ) -> Result<(), DirstateMapError> {
317 457 self.add_or_remove_file(filename, old_state, entry);
318 458 Ok(())
319 459 }
320 460
321 461 fn remove_file(
322 462 &mut self,
323 463 filename: &HgPath,
324 464 old_state: EntryState,
325 465 size: i32,
326 466 ) -> Result<(), DirstateMapError> {
327 467 let entry = DirstateEntry {
328 468 state: EntryState::Removed,
329 469 mode: 0,
330 470 size,
331 471 mtime: 0,
332 472 };
333 473 self.add_or_remove_file(filename, old_state, entry);
334 474 Ok(())
335 475 }
336 476
337 477 fn drop_file(
338 478 &mut self,
339 479 filename: &HgPath,
340 480 old_state: EntryState,
341 481 ) -> Result<bool, DirstateMapError> {
342 482 struct Dropped {
343 483 was_tracked: bool,
344 484 had_entry: bool,
345 485 had_copy_source: bool,
346 486 }
347 487 fn recur(nodes: &mut ChildNodes, path: &HgPath) -> Option<Dropped> {
348 488 let (first_path_component, rest_of_path) =
349 489 path.split_first_component();
350 let node = nodes.get_mut(first_path_component)?;
490 let node = nodes.make_mut().get_mut(first_path_component)?;
351 491 let dropped;
352 492 if let Some(rest) = rest_of_path {
353 493 dropped = recur(&mut node.children, rest)?;
354 494 if dropped.was_tracked {
355 495 node.tracked_descendants_count -= 1;
356 496 }
357 497 } else {
358 498 dropped = Dropped {
359 499 was_tracked: node
360 500 .entry
361 501 .as_ref()
362 502 .map_or(false, |entry| entry.state.is_tracked()),
363 503 had_entry: node.entry.take().is_some(),
364 504 had_copy_source: node.copy_source.take().is_some(),
365 505 };
366 506 }
367 507 // After recursion, for both leaf (rest_of_path is None) nodes and
368 508 // parent nodes, remove a node if it just became empty.
369 509 if node.entry.is_none()
370 510 && node.copy_source.is_none()
371 511 && node.children.is_empty()
372 512 {
373 nodes.remove(first_path_component);
513 nodes.make_mut().remove(first_path_component);
374 514 }
375 515 Some(dropped)
376 516 }
377 517
378 518 if let Some(dropped) = recur(&mut self.root, filename) {
379 519 if dropped.had_entry {
380 520 self.nodes_with_entry_count -= 1
381 521 }
382 522 if dropped.had_copy_source {
383 523 self.nodes_with_copy_source_count -= 1
384 524 }
385 525 Ok(dropped.had_entry)
386 526 } else {
387 527 debug_assert!(!old_state.is_tracked());
388 528 Ok(false)
389 529 }
390 530 }
391 531
392 532 fn clear_ambiguous_times(&mut self, filenames: Vec<HgPathBuf>, now: i32) {
393 533 for filename in filenames {
394 534 if let Some(node) = Self::get_node_mut(&mut self.root, &filename) {
395 535 if let Some(entry) = node.entry.as_mut() {
396 536 entry.clear_ambiguous_mtime(now);
397 537 }
398 538 }
399 539 }
400 540 }
401 541
402 542 fn non_normal_entries_contains(&mut self, key: &HgPath) -> bool {
403 543 self.get_node(key)
404 .and_then(|node| node.entry.as_ref())
405 .map_or(false, DirstateEntry::is_non_normal)
544 .and_then(|node| node.entry())
545 .map_or(false, |entry| entry.is_non_normal())
406 546 }
407 547
408 548 fn non_normal_entries_remove(&mut self, _key: &HgPath) {
409 549 // Do nothing, this `DirstateMap` does not have a separate "non normal
410 550 // entries" set that need to be kept up to date
411 551 }
412 552
413 553 fn non_normal_or_other_parent_paths(
414 554 &mut self,
415 555 ) -> Box<dyn Iterator<Item = &HgPath> + '_> {
416 Box::new(self.iter_nodes().filter_map(|(path, node)| {
417 node.entry
418 .as_ref()
556 Box::new(self.iter_nodes().filter_map(|node| {
557 node.entry()
419 558 .filter(|entry| {
420 559 entry.is_non_normal() || entry.is_from_other_parent()
421 560 })
422 .map(|_| &**path)
561 .map(|_| node.full_path())
423 562 }))
424 563 }
425 564
426 565 fn set_non_normal_other_parent_entries(&mut self, _force: bool) {
427 566 // Do nothing, this `DirstateMap` does not have a separate "non normal
428 567 // entries" and "from other parent" sets that need to be recomputed
429 568 }
430 569
431 570 fn iter_non_normal_paths(
432 571 &mut self,
433 572 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
434 573 self.iter_non_normal_paths_panic()
435 574 }
436 575
437 576 fn iter_non_normal_paths_panic(
438 577 &self,
439 578 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
440 Box::new(self.iter_nodes().filter_map(|(path, node)| {
441 node.entry
442 .as_ref()
579 Box::new(self.iter_nodes().filter_map(|node| {
580 node.entry()
443 581 .filter(|entry| entry.is_non_normal())
444 .map(|_| &**path)
582 .map(|_| node.full_path())
445 583 }))
446 584 }
447 585
448 586 fn iter_other_parent_paths(
449 587 &mut self,
450 588 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
451 Box::new(self.iter_nodes().filter_map(|(path, node)| {
452 node.entry
453 .as_ref()
589 Box::new(self.iter_nodes().filter_map(|node| {
590 node.entry()
454 591 .filter(|entry| entry.is_from_other_parent())
455 .map(|_| &**path)
592 .map(|_| node.full_path())
456 593 }))
457 594 }
458 595
459 596 fn has_tracked_dir(
460 597 &mut self,
461 598 directory: &HgPath,
462 599 ) -> Result<bool, DirstateMapError> {
463 600 if let Some(node) = self.get_node(directory) {
464 601 // A node without a `DirstateEntry` was created to hold child
465 602 // nodes, and is therefore a directory.
466 Ok(node.entry.is_none() && node.tracked_descendants_count > 0)
603 Ok(!node.has_entry() && node.tracked_descendants_count() > 0)
467 604 } else {
468 605 Ok(false)
469 606 }
470 607 }
471 608
472 609 fn has_dir(
473 610 &mut self,
474 611 directory: &HgPath,
475 612 ) -> Result<bool, DirstateMapError> {
476 613 if let Some(node) = self.get_node(directory) {
477 614 // A node without a `DirstateEntry` was created to hold child
478 615 // nodes, and is therefore a directory.
479 Ok(node.entry.is_none())
616 Ok(!node.has_entry())
480 617 } else {
481 618 Ok(false)
482 619 }
483 620 }
484 621
485 622 #[timed]
486 623 fn pack_v1(
487 624 &mut self,
488 625 parents: DirstateParents,
489 626 now: Timestamp,
490 627 ) -> Result<Vec<u8>, DirstateError> {
491 628 let now: i32 = now.0.try_into().expect("time overflow");
492 629 let mut ambiguous_mtimes = Vec::new();
493 630 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
494 631 // reallocations
495 632 let mut size = parents.as_bytes().len();
496 for (path, node) in self.iter_nodes() {
497 if let Some(entry) = &node.entry {
498 size += packed_entry_size(
499 path,
500 node.copy_source.as_ref().map(|p| &**p),
501 );
633 for node in self.iter_nodes() {
634 if let Some(entry) = node.entry() {
635 size +=
636 packed_entry_size(node.full_path(), node.copy_source());
502 637 if entry.mtime_is_ambiguous(now) {
503 ambiguous_mtimes.push(path.clone())
638 ambiguous_mtimes.push(node.full_path_cow())
504 639 }
505 640 }
506 641 }
507 642 self.clear_known_ambiguous_mtimes(&ambiguous_mtimes);
508 643
509 644 let mut packed = Vec::with_capacity(size);
510 645 packed.extend(parents.as_bytes());
511 646
512 for (path, node) in self.iter_nodes() {
513 if let Some(entry) = &node.entry {
647 for node in self.iter_nodes() {
648 if let Some(entry) = node.entry() {
514 649 pack_entry(
515 path,
516 entry,
517 node.copy_source.as_ref().map(|p| &**p),
650 node.full_path(),
651 &entry,
652 node.copy_source(),
518 653 &mut packed,
519 654 );
520 655 }
521 656 }
522 657 Ok(packed)
523 658 }
524 659
525 660 #[timed]
526 661 fn pack_v2(
527 662 &mut self,
528 663 parents: DirstateParents,
529 664 now: Timestamp,
530 665 ) -> Result<Vec<u8>, DirstateError> {
531 666 // TODO: how do we want to handle this in 2038?
532 667 let now: i32 = now.0.try_into().expect("time overflow");
533 668 let mut paths = Vec::new();
534 for (path, node) in self.iter_nodes() {
535 if let Some(entry) = &node.entry {
669 for node in self.iter_nodes() {
670 if let Some(entry) = node.entry() {
536 671 if entry.mtime_is_ambiguous(now) {
537 paths.push(path.clone())
672 paths.push(node.full_path_cow())
538 673 }
539 674 }
540 675 }
541 676 // Borrow of `self` ends here since we collect cloned paths
542 677
543 678 self.clear_known_ambiguous_mtimes(&paths);
544 679
545 680 on_disk::write(self, parents)
546 681 }
547 682
548 683 fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> {
549 684 // Do nothing, this `DirstateMap` does not a separate `all_dirs` that
550 685 // needs to be recomputed
551 686 Ok(())
552 687 }
553 688
554 689 fn set_dirs(&mut self) -> Result<(), DirstateMapError> {
555 690 // Do nothing, this `DirstateMap` does not a separate `dirs` that needs
556 691 // to be recomputed
557 692 Ok(())
558 693 }
559 694
560 695 fn status<'a>(
561 696 &'a mut self,
562 697 matcher: &'a (dyn Matcher + Sync),
563 698 root_dir: PathBuf,
564 699 ignore_files: Vec<PathBuf>,
565 700 options: StatusOptions,
566 701 ) -> Result<(DirstateStatus<'a>, Vec<PatternFileWarning>), StatusError>
567 702 {
568 703 super::status::status(self, matcher, root_dir, ignore_files, options)
569 704 }
570 705
571 706 fn copy_map_len(&self) -> usize {
572 707 self.nodes_with_copy_source_count as usize
573 708 }
574 709
575 710 fn copy_map_iter(&self) -> CopyMapIter<'_> {
576 Box::new(self.iter_nodes().filter_map(|(path, node)| {
577 node.copy_source
578 .as_ref()
579 .map(|copy_source| (&**path, &**copy_source))
711 Box::new(self.iter_nodes().filter_map(|node| {
712 node.copy_source()
713 .map(|copy_source| (node.full_path(), copy_source))
580 714 }))
581 715 }
582 716
583 717 fn copy_map_contains_key(&self, key: &HgPath) -> bool {
584 718 if let Some(node) = self.get_node(key) {
585 node.copy_source.is_some()
719 node.copy_source().is_some()
586 720 } else {
587 721 false
588 722 }
589 723 }
590 724
591 725 fn copy_map_get(&self, key: &HgPath) -> Option<&HgPath> {
592 self.get_node(key)?.copy_source.as_ref().map(|p| &**p)
726 self.get_node(key)?.copy_source()
593 727 }
594 728
595 729 fn copy_map_remove(&mut self, key: &HgPath) -> Option<HgPathBuf> {
596 730 let count = &mut self.nodes_with_copy_source_count;
597 731 Self::get_node_mut(&mut self.root, key).and_then(|node| {
598 732 if node.copy_source.is_some() {
599 733 *count -= 1
600 734 }
601 735 node.copy_source.take().map(Cow::into_owned)
602 736 })
603 737 }
604 738
605 739 fn copy_map_insert(
606 740 &mut self,
607 741 key: HgPathBuf,
608 742 value: HgPathBuf,
609 743 ) -> Option<HgPathBuf> {
610 744 let node = Self::get_or_insert_node(
611 745 &mut self.root,
612 746 &key,
613 747 WithBasename::to_cow_owned,
614 748 |_ancestor| {},
615 749 );
616 750 if node.copy_source.is_none() {
617 751 self.nodes_with_copy_source_count += 1
618 752 }
619 753 node.copy_source.replace(value.into()).map(Cow::into_owned)
620 754 }
621 755
622 756 fn len(&self) -> usize {
623 757 self.nodes_with_entry_count as usize
624 758 }
625 759
626 760 fn contains_key(&self, key: &HgPath) -> bool {
627 761 self.get(key).is_some()
628 762 }
629 763
630 764 fn get(&self, key: &HgPath) -> Option<DirstateEntry> {
631 self.get_node(key)?.entry
765 self.get_node(key)?.entry()
632 766 }
633 767
634 768 fn iter(&self) -> StateMapIter<'_> {
635 Box::new(self.iter_nodes().filter_map(|(path, node)| {
636 node.entry.map(|entry| (&**path, entry))
769 Box::new(self.iter_nodes().filter_map(|node| {
770 node.entry().map(|entry| (node.full_path(), entry))
637 771 }))
638 772 }
639 773 }
@@ -1,326 +1,331 b''
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! # File format
4 4 //!
5 5 //! The file starts with a fixed-sized header, whose layout is defined by the
6 6 //! `Header` struct. Its `root` field contains the slice (offset and length) to
7 7 //! the nodes representing the files and directories at the root of the
8 8 //! repository. Each node is also fixed-size, defined by the `Node` struct.
9 9 //! Nodes in turn contain slices to variable-size paths, and to their own child
10 10 //! nodes (if any) for nested files and directories.
11 11
12 use crate::dirstate_tree::dirstate_map::{self, DirstateMap};
12 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
13 13 use crate::dirstate_tree::path_with_basename::WithBasename;
14 14 use crate::errors::HgError;
15 15 use crate::utils::hg_path::HgPath;
16 16 use crate::DirstateEntry;
17 17 use crate::DirstateError;
18 18 use crate::DirstateParents;
19 19 use bytes_cast::unaligned::{I32Be, U32Be, U64Be};
20 20 use bytes_cast::BytesCast;
21 21 use std::borrow::Cow;
22 22 use std::convert::{TryFrom, TryInto};
23 23
24 24 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
25 25 /// This a redundant sanity check more than an actual "magic number" since
26 26 /// `.hg/requires` already governs which format should be used.
27 27 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
28 28
29 29 #[derive(BytesCast)]
30 30 #[repr(C)]
31 31 struct Header {
32 32 marker: [u8; V2_FORMAT_MARKER.len()],
33 33
34 34 /// `dirstatemap.parents()` in `mercurial/dirstate.py` relies on this
35 35 /// `parents` field being at this offset, immediately after `marker`.
36 36 parents: DirstateParents,
37 37
38 38 root: ChildNodes,
39 39 nodes_with_entry_count: Size,
40 40 nodes_with_copy_source_count: Size,
41 41 }
42 42
43 43 #[derive(BytesCast)]
44 44 #[repr(C)]
45 45 struct Node {
46 46 full_path: PathSlice,
47 47
48 48 /// In bytes from `self.full_path.start`
49 49 base_name_start: Size,
50 50
51 51 copy_source: OptPathSlice,
52 52 entry: OptEntry,
53 53 children: ChildNodes,
54 54 tracked_descendants_count: Size,
55 55 }
56 56
57 57 /// Either nothing if `state == b'\0'`, or a dirstate entry like in the v1
58 58 /// format
59 59 #[derive(BytesCast)]
60 60 #[repr(C)]
61 61 struct OptEntry {
62 62 state: u8,
63 63 mode: I32Be,
64 64 mtime: I32Be,
65 65 size: I32Be,
66 66 }
67 67
68 68 /// Counted in bytes from the start of the file
69 69 ///
70 70 /// NOTE: If we decide to never support `.hg/dirstate` files larger than 4 GiB
71 71 /// we could save space by using `U32Be` instead.
72 72 type Offset = U64Be;
73 73
74 74 /// Counted in number of items
75 75 ///
76 76 /// NOTE: not supporting directories with more than 4 billion direct children,
77 77 /// or filenames more than 4 GiB.
78 78 type Size = U32Be;
79 79
80 80 /// Location of consecutive, fixed-size items.
81 81 ///
82 82 /// An item can be a single byte for paths, or a struct with
83 83 /// `derive(BytesCast)`.
84 84 #[derive(BytesCast, Copy, Clone)]
85 85 #[repr(C)]
86 86 struct Slice {
87 87 start: Offset,
88 88 len: Size,
89 89 }
90 90
91 91 /// A contiguous sequence of `len` times `Node`, representing the child nodes
92 92 /// of either some other node or of the repository root.
93 93 ///
94 94 /// Always sorted by ascending `full_path`, to allow binary search.
95 95 /// Since nodes with the same parent nodes also have the same parent path,
96 96 /// only the `base_name`s need to be compared during binary search.
97 97 type ChildNodes = Slice;
98 98
99 99 /// A `HgPath` of `len` bytes
100 100 type PathSlice = Slice;
101 101
102 102 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
103 103 type OptPathSlice = Slice;
104 104
105 105 /// Make sure that size-affecting changes are made knowingly
106 106 fn _static_assert_size_of() {
107 107 let _ = std::mem::transmute::<Header, [u8; 72]>;
108 108 let _ = std::mem::transmute::<Node, [u8; 57]>;
109 109 }
110 110
111 111 pub(super) fn read<'on_disk>(
112 112 on_disk: &'on_disk [u8],
113 113 ) -> Result<(DirstateMap<'on_disk>, Option<DirstateParents>), DirstateError> {
114 114 if on_disk.is_empty() {
115 115 return Ok((DirstateMap::empty(on_disk), None));
116 116 }
117 117 let (header, _) = Header::from_bytes(on_disk)
118 118 .map_err(|_| HgError::corrupted("truncated dirstate-v2"))?;
119 119 let Header {
120 120 marker,
121 121 parents,
122 122 root,
123 123 nodes_with_entry_count,
124 124 nodes_with_copy_source_count,
125 125 } = header;
126 126 if marker != V2_FORMAT_MARKER {
127 127 return Err(HgError::corrupted("missing dirstated-v2 marker").into());
128 128 }
129 129 let dirstate_map = DirstateMap {
130 130 on_disk,
131 131 root: read_nodes(on_disk, *root)?,
132 132 nodes_with_entry_count: nodes_with_entry_count.get(),
133 133 nodes_with_copy_source_count: nodes_with_copy_source_count.get(),
134 134 };
135 135 let parents = Some(parents.clone());
136 136 Ok((dirstate_map, parents))
137 137 }
138 138
139 139 impl Node {
140 140 pub(super) fn path<'on_disk>(
141 141 &self,
142 142 on_disk: &'on_disk [u8],
143 143 ) -> Result<dirstate_map::NodeKey<'on_disk>, HgError> {
144 144 let full_path = read_hg_path(on_disk, self.full_path)?;
145 145 let base_name_start = usize::try_from(self.base_name_start.get())
146 146 // u32 -> usize, could only panic on a 16-bit CPU
147 147 .expect("dirstate-v2 base_name_start out of bounds");
148 148 if base_name_start < full_path.len() {
149 149 Ok(WithBasename::from_raw_parts(full_path, base_name_start))
150 150 } else {
151 151 Err(HgError::corrupted(
152 152 "dirstate-v2 base_name_start out of bounds",
153 153 ))
154 154 }
155 155 }
156 156
157 157 pub(super) fn copy_source<'on_disk>(
158 158 &self,
159 159 on_disk: &'on_disk [u8],
160 160 ) -> Result<Option<Cow<'on_disk, HgPath>>, HgError> {
161 161 Ok(if self.copy_source.start.get() != 0 {
162 162 Some(read_hg_path(on_disk, self.copy_source)?)
163 163 } else {
164 164 None
165 165 })
166 166 }
167 167
168 168 pub(super) fn entry(&self) -> Result<Option<DirstateEntry>, HgError> {
169 169 Ok(if self.entry.state != b'\0' {
170 170 Some(DirstateEntry {
171 171 state: self.entry.state.try_into()?,
172 172 mode: self.entry.mode.get(),
173 173 mtime: self.entry.mtime.get(),
174 174 size: self.entry.size.get(),
175 175 })
176 176 } else {
177 177 None
178 178 })
179 179 }
180 180
181 181 pub(super) fn to_in_memory_node<'on_disk>(
182 182 &self,
183 183 on_disk: &'on_disk [u8],
184 184 ) -> Result<dirstate_map::Node<'on_disk>, HgError> {
185 185 Ok(dirstate_map::Node {
186 186 children: read_nodes(on_disk, self.children)?,
187 187 copy_source: self.copy_source(on_disk)?,
188 188 entry: self.entry()?,
189 189 tracked_descendants_count: self.tracked_descendants_count.get(),
190 190 })
191 191 }
192 192 }
193 193
194 194 fn read_nodes(
195 195 on_disk: &[u8],
196 196 slice: ChildNodes,
197 197 ) -> Result<dirstate_map::ChildNodes, HgError> {
198 198 read_slice::<Node>(on_disk, slice)?
199 199 .iter()
200 200 .map(|node| {
201 201 Ok((node.path(on_disk)?, node.to_in_memory_node(on_disk)?))
202 202 })
203 .collect()
203 .collect::<Result<_, _>>()
204 .map(dirstate_map::ChildNodes::InMemory)
204 205 }
205 206
206 207 fn read_hg_path(on_disk: &[u8], slice: Slice) -> Result<Cow<HgPath>, HgError> {
207 208 let bytes = read_slice::<u8>(on_disk, slice)?;
208 209 Ok(Cow::Borrowed(HgPath::new(bytes)))
209 210 }
210 211
211 212 fn read_slice<T>(on_disk: &[u8], slice: Slice) -> Result<&[T], HgError>
212 213 where
213 214 T: BytesCast,
214 215 {
215 216 // Either `usize::MAX` would result in "out of bounds" error since a single
216 217 // `&[u8]` cannot occupy the entire addess space.
217 218 let start = usize::try_from(slice.start.get()).unwrap_or(std::usize::MAX);
218 219 let len = usize::try_from(slice.len.get()).unwrap_or(std::usize::MAX);
219 220 on_disk
220 221 .get(start..)
221 222 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
222 223 .map(|(slice, _rest)| slice)
223 224 .ok_or_else(|| {
224 225 HgError::corrupted("dirstate v2 slice is out of bounds")
225 226 })
226 227 }
227 228
228 229 pub(super) fn write(
229 230 dirstate_map: &mut DirstateMap,
230 231 parents: DirstateParents,
231 232 ) -> Result<Vec<u8>, DirstateError> {
232 233 let header_len = std::mem::size_of::<Header>();
233 234
234 235 // This ignores the space for paths, and for nodes without an entry.
235 236 // TODO: better estimate? Skip the `Vec` and write to a file directly?
236 237 let size_guess = header_len
237 238 + std::mem::size_of::<Node>()
238 239 * dirstate_map.nodes_with_entry_count as usize;
239 240 let mut out = Vec::with_capacity(size_guess);
240 241
241 242 // Keep space for the header. We’ll fill it out at the end when we know the
242 243 // actual offset for the root nodes.
243 244 out.resize(header_len, 0_u8);
244 245
245 let root = write_nodes(&mut dirstate_map.root, &mut out)?;
246 let root = write_nodes(dirstate_map.root.as_ref(), &mut out)?;
246 247
247 248 let header = Header {
248 249 marker: *V2_FORMAT_MARKER,
249 250 parents: parents,
250 251 root,
251 252 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
252 253 nodes_with_copy_source_count: dirstate_map
253 254 .nodes_with_copy_source_count
254 255 .into(),
255 256 };
256 257 out[..header_len].copy_from_slice(header.as_bytes());
257 258 Ok(out)
258 259 }
259 260
260 261 fn write_nodes(
261 nodes: &dirstate_map::ChildNodes,
262 nodes: dirstate_map::ChildNodesRef,
262 263 out: &mut Vec<u8>,
263 264 ) -> Result<ChildNodes, DirstateError> {
264 265 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
265 266 // order. Sort to enable binary search in the written file.
266 let nodes = dirstate_map::Node::sorted(nodes);
267 let nodes = nodes.sorted();
267 268
268 269 // First accumulate serialized nodes in a `Vec`
269 270 let mut on_disk_nodes = Vec::with_capacity(nodes.len());
270 for (full_path, node) in nodes {
271 on_disk_nodes.push(Node {
272 children: write_nodes(&node.children, out)?,
273 tracked_descendants_count: node.tracked_descendants_count.into(),
274 full_path: write_slice::<u8>(
275 full_path.full_path().as_bytes(),
276 out,
277 ),
278 base_name_start: u32::try_from(full_path.base_name_start())
279 // Could only panic for paths over 4 GiB
280 .expect("dirstate-v2 offset overflow")
281 .into(),
282 copy_source: if let Some(source) = &node.copy_source {
283 write_slice::<u8>(source.as_bytes(), out)
284 } else {
285 Slice {
286 start: 0.into(),
287 len: 0.into(),
288 }
289 },
290 entry: if let Some(entry) = &node.entry {
291 OptEntry {
292 state: entry.state.into(),
293 mode: entry.mode.into(),
294 mtime: entry.mtime.into(),
295 size: entry.size.into(),
296 }
297 } else {
298 OptEntry {
299 state: b'\0',
300 mode: 0.into(),
301 mtime: 0.into(),
302 size: 0.into(),
303 }
271 for node in nodes {
272 let children = write_nodes(node.children(), out)?;
273 let full_path = write_slice::<u8>(node.full_path().as_bytes(), out);
274 let copy_source = if let Some(source) = node.copy_source() {
275 write_slice::<u8>(source.as_bytes(), out)
276 } else {
277 Slice {
278 start: 0.into(),
279 len: 0.into(),
280 }
281 };
282 on_disk_nodes.push(match node {
283 NodeRef::InMemory(path, node) => Node {
284 children,
285 copy_source,
286 full_path,
287 base_name_start: u32::try_from(path.base_name_start())
288 // Could only panic for paths over 4 GiB
289 .expect("dirstate-v2 offset overflow")
290 .into(),
291 tracked_descendants_count: node
292 .tracked_descendants_count
293 .into(),
294 entry: if let Some(entry) = &node.entry {
295 OptEntry {
296 state: entry.state.into(),
297 mode: entry.mode.into(),
298 mtime: entry.mtime.into(),
299 size: entry.size.into(),
300 }
301 } else {
302 OptEntry {
303 state: b'\0',
304 mode: 0.into(),
305 mtime: 0.into(),
306 size: 0.into(),
307 }
308 },
304 309 },
305 310 })
306 311 }
307 312 // … so we can write them contiguously
308 313 Ok(write_slice::<Node>(&on_disk_nodes, out))
309 314 }
310 315
311 316 fn write_slice<T>(slice: &[T], out: &mut Vec<u8>) -> Slice
312 317 where
313 318 T: BytesCast,
314 319 {
315 320 let start = u64::try_from(out.len())
316 321 // Could only panic on a 128-bit CPU with a dirstate over 16 EiB
317 322 .expect("dirstate-v2 offset overflow")
318 323 .into();
319 324 let len = u32::try_from(slice.len())
320 325 // Could only panic for paths over 4 GiB or nodes with over 4 billions
321 326 // child nodes
322 327 .expect("dirstate-v2 offset overflow")
323 328 .into();
324 329 out.extend(slice.as_bytes());
325 330 Slice { start, len }
326 331 }
@@ -1,426 +1,413 b''
1 1 use crate::dirstate::status::IgnoreFnType;
2 use crate::dirstate_tree::dirstate_map::ChildNodes;
2 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
3 3 use crate::dirstate_tree::dirstate_map::DirstateMap;
4 use crate::dirstate_tree::dirstate_map::Node;
4 use crate::dirstate_tree::dirstate_map::NodeRef;
5 5 use crate::matchers::get_ignore_function;
6 6 use crate::matchers::Matcher;
7 7 use crate::utils::files::get_bytes_from_os_string;
8 8 use crate::utils::hg_path::HgPath;
9 9 use crate::BadMatch;
10 10 use crate::DirstateStatus;
11 11 use crate::EntryState;
12 12 use crate::HgPathBuf;
13 13 use crate::PatternFileWarning;
14 14 use crate::StatusError;
15 15 use crate::StatusOptions;
16 16 use micro_timer::timed;
17 17 use rayon::prelude::*;
18 18 use std::borrow::Cow;
19 19 use std::io;
20 20 use std::path::Path;
21 21 use std::path::PathBuf;
22 22 use std::sync::Mutex;
23 23
24 24 /// Returns the status of the working directory compared to its parent
25 25 /// changeset.
26 26 ///
27 27 /// This algorithm is based on traversing the filesystem tree (`fs` in function
28 28 /// and variable names) and dirstate tree at the same time. The core of this
29 29 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
30 30 /// and its use of `itertools::merge_join_by`. When reaching a path that only
31 31 /// exists in one of the two trees, depending on information requested by
32 32 /// `options` we may need to traverse the remaining subtree.
33 33 #[timed]
34 34 pub fn status<'tree>(
35 35 dmap: &'tree mut DirstateMap,
36 36 matcher: &(dyn Matcher + Sync),
37 37 root_dir: PathBuf,
38 38 ignore_files: Vec<PathBuf>,
39 39 options: StatusOptions,
40 40 ) -> Result<(DirstateStatus<'tree>, Vec<PatternFileWarning>), StatusError> {
41 41 let (ignore_fn, warnings): (IgnoreFnType, _) =
42 42 if options.list_ignored || options.list_unknown {
43 43 get_ignore_function(ignore_files, &root_dir)?
44 44 } else {
45 45 (Box::new(|&_| true), vec![])
46 46 };
47 47
48 48 let common = StatusCommon {
49 49 options,
50 50 matcher,
51 51 ignore_fn,
52 52 outcome: Mutex::new(DirstateStatus::default()),
53 53 };
54 54 let is_at_repo_root = true;
55 55 let hg_path = HgPath::new("");
56 56 let has_ignored_ancestor = false;
57 57 common.traverse_fs_directory_and_dirstate(
58 58 has_ignored_ancestor,
59 &dmap.root,
59 dmap.root.as_ref(),
60 60 hg_path,
61 61 &root_dir,
62 62 is_at_repo_root,
63 63 );
64 64 Ok((common.outcome.into_inner().unwrap(), warnings))
65 65 }
66 66
67 67 /// Bag of random things needed by various parts of the algorithm. Reduces the
68 68 /// number of parameters passed to functions.
69 69 struct StatusCommon<'tree, 'a> {
70 70 options: StatusOptions,
71 71 matcher: &'a (dyn Matcher + Sync),
72 72 ignore_fn: IgnoreFnType<'a>,
73 73 outcome: Mutex<DirstateStatus<'tree>>,
74 74 }
75 75
76 76 impl<'tree, 'a> StatusCommon<'tree, 'a> {
77 77 fn read_dir(
78 78 &self,
79 79 hg_path: &HgPath,
80 80 fs_path: &Path,
81 81 is_at_repo_root: bool,
82 82 ) -> Result<Vec<DirEntry>, ()> {
83 83 DirEntry::read_dir(fs_path, is_at_repo_root).map_err(|error| {
84 84 let errno = error.raw_os_error().expect("expected real OS error");
85 85 self.outcome
86 86 .lock()
87 87 .unwrap()
88 88 .bad
89 89 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
90 90 })
91 91 }
92 92
93 93 fn traverse_fs_directory_and_dirstate(
94 94 &self,
95 95 has_ignored_ancestor: bool,
96 dirstate_nodes: &'tree ChildNodes,
96 dirstate_nodes: ChildNodesRef<'tree, '_>,
97 97 directory_hg_path: &'tree HgPath,
98 98 directory_fs_path: &Path,
99 99 is_at_repo_root: bool,
100 100 ) {
101 101 let mut fs_entries = if let Ok(entries) = self.read_dir(
102 102 directory_hg_path,
103 103 directory_fs_path,
104 104 is_at_repo_root,
105 105 ) {
106 106 entries
107 107 } else {
108 108 return;
109 109 };
110 110
111 111 // `merge_join_by` requires both its input iterators to be sorted:
112 112
113 let dirstate_nodes = Node::sorted(dirstate_nodes);
113 let dirstate_nodes = dirstate_nodes.sorted();
114 114 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
115 115 // https://github.com/rust-lang/rust/issues/34162
116 116 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
117 117
118 118 itertools::merge_join_by(
119 119 dirstate_nodes,
120 120 &fs_entries,
121 |(full_path, _node), fs_entry| {
122 full_path.base_name().cmp(&fs_entry.base_name)
121 |dirstate_node, fs_entry| {
122 dirstate_node.base_name().cmp(&fs_entry.base_name)
123 123 },
124 124 )
125 125 .par_bridge()
126 126 .for_each(|pair| {
127 127 use itertools::EitherOrBoth::*;
128 128 match pair {
129 Both((hg_path, dirstate_node), fs_entry) => {
129 Both(dirstate_node, fs_entry) => {
130 130 self.traverse_fs_and_dirstate(
131 131 fs_entry,
132 hg_path.full_path(),
133 132 dirstate_node,
134 133 has_ignored_ancestor,
135 134 );
136 135 }
137 Left((hg_path, dirstate_node)) => self.traverse_dirstate_only(
138 hg_path.full_path(),
139 dirstate_node,
140 ),
136 Left(dirstate_node) => {
137 self.traverse_dirstate_only(dirstate_node)
138 }
141 139 Right(fs_entry) => self.traverse_fs_only(
142 140 has_ignored_ancestor,
143 141 directory_hg_path,
144 142 fs_entry,
145 143 ),
146 144 }
147 145 })
148 146 }
149 147
150 148 fn traverse_fs_and_dirstate(
151 149 &self,
152 150 fs_entry: &DirEntry,
153 hg_path: &'tree HgPath,
154 dirstate_node: &'tree Node,
151 dirstate_node: NodeRef<'tree, '_>,
155 152 has_ignored_ancestor: bool,
156 153 ) {
154 let hg_path = dirstate_node.full_path();
157 155 let file_type = fs_entry.metadata.file_type();
158 156 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
159 157 if !file_or_symlink {
160 158 // If we previously had a file here, it was removed (with
161 159 // `hg rm` or similar) or deleted before it could be
162 160 // replaced by a directory or something else.
163 161 self.mark_removed_or_deleted_if_file(
164 hg_path,
162 dirstate_node.full_path(),
165 163 dirstate_node.state(),
166 164 );
167 165 }
168 166 if file_type.is_dir() {
169 167 if self.options.collect_traversed_dirs {
170 168 self.outcome.lock().unwrap().traversed.push(hg_path.into())
171 169 }
172 170 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
173 171 let is_at_repo_root = false;
174 172 self.traverse_fs_directory_and_dirstate(
175 173 is_ignored,
176 &dirstate_node.children,
174 dirstate_node.children(),
177 175 hg_path,
178 176 &fs_entry.full_path,
179 177 is_at_repo_root,
180 178 );
181 179 } else {
182 180 if file_or_symlink && self.matcher.matches(hg_path) {
183 181 let full_path = Cow::from(hg_path);
184 if let Some(entry) = &dirstate_node.entry {
185 match entry.state {
182 if let Some(state) = dirstate_node.state() {
183 match state {
186 184 EntryState::Added => {
187 185 self.outcome.lock().unwrap().added.push(full_path)
188 186 }
189 187 EntryState::Removed => self
190 188 .outcome
191 189 .lock()
192 190 .unwrap()
193 191 .removed
194 192 .push(full_path),
195 193 EntryState::Merged => self
196 194 .outcome
197 195 .lock()
198 196 .unwrap()
199 197 .modified
200 198 .push(full_path),
201 199 EntryState::Normal => {
202 self.handle_normal_file(
203 full_path,
204 dirstate_node,
205 entry,
206 fs_entry,
207 );
200 self.handle_normal_file(&dirstate_node, fs_entry);
208 201 }
209 202 // This variant is not used in DirstateMap
210 203 // nodes
211 204 EntryState::Unknown => unreachable!(),
212 205 }
213 206 } else {
214 207 // `node.entry.is_none()` indicates a "directory"
215 208 // node, but the filesystem has a file
216 209 self.mark_unknown_or_ignored(
217 210 has_ignored_ancestor,
218 211 full_path,
219 212 )
220 213 }
221 214 }
222 215
223 for (child_hg_path, child_node) in &dirstate_node.children {
224 self.traverse_dirstate_only(
225 child_hg_path.full_path(),
226 child_node,
227 )
216 for child_node in dirstate_node.children().iter() {
217 self.traverse_dirstate_only(child_node)
228 218 }
229 219 }
230 220 }
231 221
232 222 /// A file with `EntryState::Normal` in the dirstate was found in the
233 223 /// filesystem
234 224 fn handle_normal_file(
235 225 &self,
236 full_path: Cow<'tree, HgPath>,
237 dirstate_node: &Node,
238 entry: &crate::DirstateEntry,
226 dirstate_node: &NodeRef<'tree, '_>,
239 227 fs_entry: &DirEntry,
240 228 ) {
241 229 // Keep the low 31 bits
242 230 fn truncate_u64(value: u64) -> i32 {
243 231 (value & 0x7FFF_FFFF) as i32
244 232 }
245 233 fn truncate_i64(value: i64) -> i32 {
246 234 (value & 0x7FFF_FFFF) as i32
247 235 }
248 236
237 let entry = dirstate_node
238 .entry()
239 .expect("handle_normal_file called with entry-less node");
240 let full_path = Cow::from(dirstate_node.full_path());
249 241 let mode_changed = || {
250 242 self.options.check_exec && entry.mode_changed(&fs_entry.metadata)
251 243 };
252 244 let size_changed = entry.size != truncate_u64(fs_entry.metadata.len());
253 245 if entry.size >= 0
254 246 && size_changed
255 247 && fs_entry.metadata.file_type().is_symlink()
256 248 {
257 249 // issue6456: Size returned may be longer due to encryption
258 250 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
259 251 self.outcome.lock().unwrap().unsure.push(full_path)
260 } else if dirstate_node.copy_source.is_some()
252 } else if dirstate_node.copy_source().is_some()
261 253 || entry.is_from_other_parent()
262 254 || (entry.size >= 0 && (size_changed || mode_changed()))
263 255 {
264 256 self.outcome.lock().unwrap().modified.push(full_path)
265 257 } else {
266 258 let mtime = mtime_seconds(&fs_entry.metadata);
267 259 if truncate_i64(mtime) != entry.mtime
268 260 || mtime == self.options.last_normal_time
269 261 {
270 262 self.outcome.lock().unwrap().unsure.push(full_path)
271 263 } else if self.options.list_clean {
272 264 self.outcome.lock().unwrap().clean.push(full_path)
273 265 }
274 266 }
275 267 }
276 268
277 269 /// A node in the dirstate tree has no corresponding filesystem entry
278 fn traverse_dirstate_only(
279 &self,
280 hg_path: &'tree HgPath,
281 dirstate_node: &'tree Node,
282 ) {
283 self.mark_removed_or_deleted_if_file(hg_path, dirstate_node.state());
284 dirstate_node.children.par_iter().for_each(
285 |(child_hg_path, child_node)| {
286 self.traverse_dirstate_only(
287 child_hg_path.full_path(),
288 child_node,
289 )
290 },
291 )
270 fn traverse_dirstate_only(&self, dirstate_node: NodeRef<'tree, '_>) {
271 self.mark_removed_or_deleted_if_file(
272 dirstate_node.full_path(),
273 dirstate_node.state(),
274 );
275 dirstate_node
276 .children()
277 .par_iter()
278 .for_each(|child_node| self.traverse_dirstate_only(child_node))
292 279 }
293 280
294 281 /// A node in the dirstate tree has no corresponding *file* on the
295 282 /// filesystem
296 283 ///
297 284 /// Does nothing on a "directory" node
298 285 fn mark_removed_or_deleted_if_file(
299 286 &self,
300 287 hg_path: &'tree HgPath,
301 288 dirstate_node_state: Option<EntryState>,
302 289 ) {
303 290 if let Some(state) = dirstate_node_state {
304 291 if self.matcher.matches(hg_path) {
305 292 if let EntryState::Removed = state {
306 293 self.outcome.lock().unwrap().removed.push(hg_path.into())
307 294 } else {
308 295 self.outcome.lock().unwrap().deleted.push(hg_path.into())
309 296 }
310 297 }
311 298 }
312 299 }
313 300
314 301 /// Something in the filesystem has no corresponding dirstate node
315 302 fn traverse_fs_only(
316 303 &self,
317 304 has_ignored_ancestor: bool,
318 305 directory_hg_path: &HgPath,
319 306 fs_entry: &DirEntry,
320 307 ) {
321 308 let hg_path = directory_hg_path.join(&fs_entry.base_name);
322 309 let file_type = fs_entry.metadata.file_type();
323 310 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
324 311 if file_type.is_dir() {
325 312 let is_ignored =
326 313 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
327 314 let traverse_children = if is_ignored {
328 315 // Descendants of an ignored directory are all ignored
329 316 self.options.list_ignored
330 317 } else {
331 318 // Descendants of an unknown directory may be either unknown or
332 319 // ignored
333 320 self.options.list_unknown || self.options.list_ignored
334 321 };
335 322 if traverse_children {
336 323 let is_at_repo_root = false;
337 324 if let Ok(children_fs_entries) = self.read_dir(
338 325 &hg_path,
339 326 &fs_entry.full_path,
340 327 is_at_repo_root,
341 328 ) {
342 329 children_fs_entries.par_iter().for_each(|child_fs_entry| {
343 330 self.traverse_fs_only(
344 331 is_ignored,
345 332 &hg_path,
346 333 child_fs_entry,
347 334 )
348 335 })
349 336 }
350 337 }
351 338 if self.options.collect_traversed_dirs {
352 339 self.outcome.lock().unwrap().traversed.push(hg_path.into())
353 340 }
354 341 } else if file_or_symlink && self.matcher.matches(&hg_path) {
355 342 self.mark_unknown_or_ignored(has_ignored_ancestor, hg_path.into())
356 343 }
357 344 }
358 345
359 346 fn mark_unknown_or_ignored(
360 347 &self,
361 348 has_ignored_ancestor: bool,
362 349 hg_path: Cow<'tree, HgPath>,
363 350 ) {
364 351 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
365 352 if is_ignored {
366 353 if self.options.list_ignored {
367 354 self.outcome.lock().unwrap().ignored.push(hg_path)
368 355 }
369 356 } else {
370 357 if self.options.list_unknown {
371 358 self.outcome.lock().unwrap().unknown.push(hg_path)
372 359 }
373 360 }
374 361 }
375 362 }
376 363
377 364 #[cfg(unix)] // TODO
378 365 fn mtime_seconds(metadata: &std::fs::Metadata) -> i64 {
379 366 // Going through `Metadata::modified()` would be portable, but would take
380 367 // care to construct a `SystemTime` value with sub-second precision just
381 368 // for us to throw that away here.
382 369 use std::os::unix::fs::MetadataExt;
383 370 metadata.mtime()
384 371 }
385 372
386 373 struct DirEntry {
387 374 base_name: HgPathBuf,
388 375 full_path: PathBuf,
389 376 metadata: std::fs::Metadata,
390 377 }
391 378
392 379 impl DirEntry {
393 380 /// Returns **unsorted** entries in the given directory, with name and
394 381 /// metadata.
395 382 ///
396 383 /// If a `.hg` sub-directory is encountered:
397 384 ///
398 385 /// * At the repository root, ignore that sub-directory
399 386 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
400 387 /// list instead.
401 388 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
402 389 let mut results = Vec::new();
403 390 for entry in path.read_dir()? {
404 391 let entry = entry?;
405 392 let metadata = entry.metadata()?;
406 393 let name = get_bytes_from_os_string(entry.file_name());
407 394 // FIXME don't do this when cached
408 395 if name == b".hg" {
409 396 if is_at_repo_root {
410 397 // Skip the repo’s own .hg (might be a symlink)
411 398 continue;
412 399 } else if metadata.is_dir() {
413 400 // A .hg sub-directory at another location means a subrepo,
414 401 // skip it entirely.
415 402 return Ok(Vec::new());
416 403 }
417 404 }
418 405 results.push(DirEntry {
419 406 base_name: name.into(),
420 407 full_path: entry.path(),
421 408 metadata,
422 409 })
423 410 }
424 411 Ok(results)
425 412 }
426 413 }
General Comments 0
You need to be logged in to leave comments. Login now