##// END OF EJS Templates
dirstate-tree: Downgrade `&mut Node` to `&Node` in status and serialization...
Simon Sapin -
r48122:0252600f default
parent child Browse files
Show More
@@ -1,639 +1,639 b''
1 1 use bytes_cast::BytesCast;
2 2 use micro_timer::timed;
3 3 use std::borrow::Cow;
4 4 use std::convert::TryInto;
5 5 use std::path::PathBuf;
6 6
7 7 use super::on_disk;
8 8 use super::path_with_basename::WithBasename;
9 9 use crate::dirstate::parsers::pack_entry;
10 10 use crate::dirstate::parsers::packed_entry_size;
11 11 use crate::dirstate::parsers::parse_dirstate_entries;
12 12 use crate::dirstate::parsers::Timestamp;
13 13 use crate::matchers::Matcher;
14 14 use crate::utils::hg_path::{HgPath, HgPathBuf};
15 15 use crate::CopyMapIter;
16 16 use crate::DirstateEntry;
17 17 use crate::DirstateError;
18 18 use crate::DirstateMapError;
19 19 use crate::DirstateParents;
20 20 use crate::DirstateStatus;
21 21 use crate::EntryState;
22 22 use crate::FastHashMap;
23 23 use crate::PatternFileWarning;
24 24 use crate::StateMapIter;
25 25 use crate::StatusError;
26 26 use crate::StatusOptions;
27 27
28 28 pub struct DirstateMap<'on_disk> {
29 29 /// Contents of the `.hg/dirstate` file
30 30 pub(super) on_disk: &'on_disk [u8],
31 31
32 32 pub(super) root: ChildNodes<'on_disk>,
33 33
34 34 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
35 35 pub(super) nodes_with_entry_count: u32,
36 36
37 37 /// Number of nodes anywhere in the tree that have
38 38 /// `.copy_source.is_some()`.
39 39 pub(super) nodes_with_copy_source_count: u32,
40 40 }
41 41
42 42 /// Using a plain `HgPathBuf` of the full path from the repository root as a
43 43 /// map key would also work: all paths in a given map have the same parent
44 44 /// path, so comparing full paths gives the same result as comparing base
45 45 /// names. However `HashMap` would waste time always re-hashing the same
46 46 /// string prefix.
47 47 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
48 48 pub(super) type ChildNodes<'on_disk> =
49 49 FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>;
50 50
51 51 /// Represents a file or a directory
52 52 #[derive(Default)]
53 53 pub(super) struct Node<'on_disk> {
54 54 /// `None` for directories
55 55 pub(super) entry: Option<DirstateEntry>,
56 56
57 57 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
58 58
59 59 pub(super) children: ChildNodes<'on_disk>,
60 60
61 61 /// How many (non-inclusive) descendants of this node are tracked files
62 62 pub(super) tracked_descendants_count: u32,
63 63 }
64 64
65 65 impl<'on_disk> Node<'on_disk> {
66 66 pub(super) fn state(&self) -> Option<EntryState> {
67 67 self.entry.as_ref().map(|entry| entry.state)
68 68 }
69 69
70 70 pub(super) fn sorted<'tree>(
71 nodes: &'tree mut ChildNodes<'on_disk>,
72 ) -> Vec<(&'tree NodeKey<'on_disk>, &'tree mut Self)> {
73 let mut vec: Vec<_> = nodes.iter_mut().collect();
71 nodes: &'tree ChildNodes<'on_disk>,
72 ) -> Vec<(&'tree NodeKey<'on_disk>, &'tree Self)> {
73 let mut vec: Vec<_> = nodes.iter().collect();
74 74 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
75 75 // https://github.com/rust-lang/rust/issues/34162
76 76 vec.sort_unstable_by(|(path1, _), (path2, _)| path1.cmp(path2));
77 77 vec
78 78 }
79 79 }
80 80
81 81 impl<'on_disk> DirstateMap<'on_disk> {
82 82 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
83 83 Self {
84 84 on_disk,
85 85 root: ChildNodes::default(),
86 86 nodes_with_entry_count: 0,
87 87 nodes_with_copy_source_count: 0,
88 88 }
89 89 }
90 90
91 91 #[timed]
92 92 pub fn new_v2(
93 93 on_disk: &'on_disk [u8],
94 94 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
95 95 on_disk::read(on_disk)
96 96 }
97 97
98 98 #[timed]
99 99 pub fn new_v1(
100 100 on_disk: &'on_disk [u8],
101 101 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
102 102 let mut map = Self::empty(on_disk);
103 103 if map.on_disk.is_empty() {
104 104 return Ok((map, None));
105 105 }
106 106
107 107 let parents = parse_dirstate_entries(
108 108 map.on_disk,
109 109 |path, entry, copy_source| {
110 110 let tracked = entry.state.is_tracked();
111 111 let node = Self::get_or_insert_node(
112 112 &mut map.root,
113 113 path,
114 114 WithBasename::to_cow_borrowed,
115 115 |ancestor| {
116 116 if tracked {
117 117 ancestor.tracked_descendants_count += 1
118 118 }
119 119 },
120 120 );
121 121 assert!(
122 122 node.entry.is_none(),
123 123 "duplicate dirstate entry in read"
124 124 );
125 125 assert!(
126 126 node.copy_source.is_none(),
127 127 "duplicate dirstate entry in read"
128 128 );
129 129 node.entry = Some(*entry);
130 130 node.copy_source = copy_source.map(Cow::Borrowed);
131 131 map.nodes_with_entry_count += 1;
132 132 if copy_source.is_some() {
133 133 map.nodes_with_copy_source_count += 1
134 134 }
135 135 },
136 136 )?;
137 137 let parents = Some(parents.clone());
138 138
139 139 Ok((map, parents))
140 140 }
141 141
142 142 fn get_node(&self, path: &HgPath) -> Option<&Node> {
143 143 let mut children = &self.root;
144 144 let mut components = path.components();
145 145 let mut component =
146 146 components.next().expect("expected at least one components");
147 147 loop {
148 148 let child = children.get(component)?;
149 149 if let Some(next_component) = components.next() {
150 150 component = next_component;
151 151 children = &child.children;
152 152 } else {
153 153 return Some(child);
154 154 }
155 155 }
156 156 }
157 157
158 158 /// Returns a mutable reference to the node at `path` if it exists
159 159 ///
160 160 /// This takes `root` instead of `&mut self` so that callers can mutate
161 161 /// other fields while the returned borrow is still valid
162 162 fn get_node_mut<'tree>(
163 163 root: &'tree mut ChildNodes<'on_disk>,
164 164 path: &HgPath,
165 165 ) -> Option<&'tree mut Node<'on_disk>> {
166 166 let mut children = root;
167 167 let mut components = path.components();
168 168 let mut component =
169 169 components.next().expect("expected at least one components");
170 170 loop {
171 171 let child = children.get_mut(component)?;
172 172 if let Some(next_component) = components.next() {
173 173 component = next_component;
174 174 children = &mut child.children;
175 175 } else {
176 176 return Some(child);
177 177 }
178 178 }
179 179 }
180 180
181 181 fn get_or_insert_node<'tree, 'path>(
182 182 root: &'tree mut ChildNodes<'on_disk>,
183 183 path: &'path HgPath,
184 184 to_cow: impl Fn(
185 185 WithBasename<&'path HgPath>,
186 186 ) -> WithBasename<Cow<'on_disk, HgPath>>,
187 187 mut each_ancestor: impl FnMut(&mut Node),
188 188 ) -> &'tree mut Node<'on_disk> {
189 189 let mut child_nodes = root;
190 190 let mut inclusive_ancestor_paths =
191 191 WithBasename::inclusive_ancestors_of(path);
192 192 let mut ancestor_path = inclusive_ancestor_paths
193 193 .next()
194 194 .expect("expected at least one inclusive ancestor");
195 195 loop {
196 196 // TODO: can we avoid allocating an owned key in cases where the
197 197 // map already contains that key, without introducing double
198 198 // lookup?
199 199 let child_node =
200 200 child_nodes.entry(to_cow(ancestor_path)).or_default();
201 201 if let Some(next) = inclusive_ancestor_paths.next() {
202 202 each_ancestor(child_node);
203 203 ancestor_path = next;
204 204 child_nodes = &mut child_node.children;
205 205 } else {
206 206 return child_node;
207 207 }
208 208 }
209 209 }
210 210
211 211 fn add_or_remove_file(
212 212 &mut self,
213 213 path: &HgPath,
214 214 old_state: EntryState,
215 215 new_entry: DirstateEntry,
216 216 ) {
217 217 let tracked_count_increment =
218 218 match (old_state.is_tracked(), new_entry.state.is_tracked()) {
219 219 (false, true) => 1,
220 220 (true, false) => -1,
221 221 _ => 0,
222 222 };
223 223
224 224 let node = Self::get_or_insert_node(
225 225 &mut self.root,
226 226 path,
227 227 WithBasename::to_cow_owned,
228 228 |ancestor| {
229 229 // We can’t use `+= increment` because the counter is unsigned,
230 230 // and we want debug builds to detect accidental underflow
231 231 // through zero
232 232 match tracked_count_increment {
233 233 1 => ancestor.tracked_descendants_count += 1,
234 234 -1 => ancestor.tracked_descendants_count -= 1,
235 235 _ => {}
236 236 }
237 237 },
238 238 );
239 239 if node.entry.is_none() {
240 240 self.nodes_with_entry_count += 1
241 241 }
242 242 node.entry = Some(new_entry)
243 243 }
244 244
245 245 fn iter_nodes<'a>(
246 246 &'a self,
247 247 ) -> impl Iterator<Item = (&'a Cow<'on_disk, HgPath>, &'a Node)> + 'a {
248 248 // Depth first tree traversal.
249 249 //
250 250 // If we could afford internal iteration and recursion,
251 251 // this would look like:
252 252 //
253 253 // ```
254 254 // fn traverse_children(
255 255 // children: &ChildNodes,
256 256 // each: &mut impl FnMut(&Node),
257 257 // ) {
258 258 // for child in children.values() {
259 259 // traverse_children(&child.children, each);
260 260 // each(child);
261 261 // }
262 262 // }
263 263 // ```
264 264 //
265 265 // However we want an external iterator and therefore can’t use the
266 266 // call stack. Use an explicit stack instead:
267 267 let mut stack = Vec::new();
268 268 let mut iter = self.root.iter();
269 269 std::iter::from_fn(move || {
270 270 while let Some((key, child_node)) = iter.next() {
271 271 // Pseudo-recursion
272 272 let new_iter = child_node.children.iter();
273 273 let old_iter = std::mem::replace(&mut iter, new_iter);
274 274 let key = key.full_path();
275 275 stack.push((key, child_node, old_iter));
276 276 }
277 277 // Found the end of a `children.iter()` iterator.
278 278 if let Some((key, child_node, next_iter)) = stack.pop() {
279 279 // "Return" from pseudo-recursion by restoring state from the
280 280 // explicit stack
281 281 iter = next_iter;
282 282
283 283 Some((key, child_node))
284 284 } else {
285 285 // Reached the bottom of the stack, we’re done
286 286 None
287 287 }
288 288 })
289 289 }
290 290
291 291 fn clear_known_ambiguous_mtimes(&mut self, paths: &[impl AsRef<HgPath>]) {
292 292 for path in paths {
293 293 if let Some(node) =
294 294 Self::get_node_mut(&mut self.root, path.as_ref())
295 295 {
296 296 if let Some(entry) = node.entry.as_mut() {
297 297 entry.clear_mtime();
298 298 }
299 299 }
300 300 }
301 301 }
302 302 }
303 303
304 304 impl<'on_disk> super::dispatch::DirstateMapMethods for DirstateMap<'on_disk> {
305 305 fn clear(&mut self) {
306 306 self.root.clear();
307 307 self.nodes_with_entry_count = 0;
308 308 self.nodes_with_copy_source_count = 0;
309 309 }
310 310
311 311 fn add_file(
312 312 &mut self,
313 313 filename: &HgPath,
314 314 old_state: EntryState,
315 315 entry: DirstateEntry,
316 316 ) -> Result<(), DirstateMapError> {
317 317 self.add_or_remove_file(filename, old_state, entry);
318 318 Ok(())
319 319 }
320 320
321 321 fn remove_file(
322 322 &mut self,
323 323 filename: &HgPath,
324 324 old_state: EntryState,
325 325 size: i32,
326 326 ) -> Result<(), DirstateMapError> {
327 327 let entry = DirstateEntry {
328 328 state: EntryState::Removed,
329 329 mode: 0,
330 330 size,
331 331 mtime: 0,
332 332 };
333 333 self.add_or_remove_file(filename, old_state, entry);
334 334 Ok(())
335 335 }
336 336
337 337 fn drop_file(
338 338 &mut self,
339 339 filename: &HgPath,
340 340 old_state: EntryState,
341 341 ) -> Result<bool, DirstateMapError> {
342 342 struct Dropped {
343 343 was_tracked: bool,
344 344 had_entry: bool,
345 345 had_copy_source: bool,
346 346 }
347 347 fn recur(nodes: &mut ChildNodes, path: &HgPath) -> Option<Dropped> {
348 348 let (first_path_component, rest_of_path) =
349 349 path.split_first_component();
350 350 let node = nodes.get_mut(first_path_component)?;
351 351 let dropped;
352 352 if let Some(rest) = rest_of_path {
353 353 dropped = recur(&mut node.children, rest)?;
354 354 if dropped.was_tracked {
355 355 node.tracked_descendants_count -= 1;
356 356 }
357 357 } else {
358 358 dropped = Dropped {
359 359 was_tracked: node
360 360 .entry
361 361 .as_ref()
362 362 .map_or(false, |entry| entry.state.is_tracked()),
363 363 had_entry: node.entry.take().is_some(),
364 364 had_copy_source: node.copy_source.take().is_some(),
365 365 };
366 366 }
367 367 // After recursion, for both leaf (rest_of_path is None) nodes and
368 368 // parent nodes, remove a node if it just became empty.
369 369 if node.entry.is_none()
370 370 && node.copy_source.is_none()
371 371 && node.children.is_empty()
372 372 {
373 373 nodes.remove(first_path_component);
374 374 }
375 375 Some(dropped)
376 376 }
377 377
378 378 if let Some(dropped) = recur(&mut self.root, filename) {
379 379 if dropped.had_entry {
380 380 self.nodes_with_entry_count -= 1
381 381 }
382 382 if dropped.had_copy_source {
383 383 self.nodes_with_copy_source_count -= 1
384 384 }
385 385 Ok(dropped.had_entry)
386 386 } else {
387 387 debug_assert!(!old_state.is_tracked());
388 388 Ok(false)
389 389 }
390 390 }
391 391
392 392 fn clear_ambiguous_times(&mut self, filenames: Vec<HgPathBuf>, now: i32) {
393 393 for filename in filenames {
394 394 if let Some(node) = Self::get_node_mut(&mut self.root, &filename) {
395 395 if let Some(entry) = node.entry.as_mut() {
396 396 entry.clear_ambiguous_mtime(now);
397 397 }
398 398 }
399 399 }
400 400 }
401 401
402 402 fn non_normal_entries_contains(&mut self, key: &HgPath) -> bool {
403 403 self.get_node(key)
404 404 .and_then(|node| node.entry.as_ref())
405 405 .map_or(false, DirstateEntry::is_non_normal)
406 406 }
407 407
408 408 fn non_normal_entries_remove(&mut self, _key: &HgPath) {
409 409 // Do nothing, this `DirstateMap` does not have a separate "non normal
410 410 // entries" set that need to be kept up to date
411 411 }
412 412
413 413 fn non_normal_or_other_parent_paths(
414 414 &mut self,
415 415 ) -> Box<dyn Iterator<Item = &HgPath> + '_> {
416 416 Box::new(self.iter_nodes().filter_map(|(path, node)| {
417 417 node.entry
418 418 .as_ref()
419 419 .filter(|entry| {
420 420 entry.is_non_normal() || entry.is_from_other_parent()
421 421 })
422 422 .map(|_| &**path)
423 423 }))
424 424 }
425 425
426 426 fn set_non_normal_other_parent_entries(&mut self, _force: bool) {
427 427 // Do nothing, this `DirstateMap` does not have a separate "non normal
428 428 // entries" and "from other parent" sets that need to be recomputed
429 429 }
430 430
431 431 fn iter_non_normal_paths(
432 432 &mut self,
433 433 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
434 434 self.iter_non_normal_paths_panic()
435 435 }
436 436
437 437 fn iter_non_normal_paths_panic(
438 438 &self,
439 439 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
440 440 Box::new(self.iter_nodes().filter_map(|(path, node)| {
441 441 node.entry
442 442 .as_ref()
443 443 .filter(|entry| entry.is_non_normal())
444 444 .map(|_| &**path)
445 445 }))
446 446 }
447 447
448 448 fn iter_other_parent_paths(
449 449 &mut self,
450 450 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
451 451 Box::new(self.iter_nodes().filter_map(|(path, node)| {
452 452 node.entry
453 453 .as_ref()
454 454 .filter(|entry| entry.is_from_other_parent())
455 455 .map(|_| &**path)
456 456 }))
457 457 }
458 458
459 459 fn has_tracked_dir(
460 460 &mut self,
461 461 directory: &HgPath,
462 462 ) -> Result<bool, DirstateMapError> {
463 463 if let Some(node) = self.get_node(directory) {
464 464 // A node without a `DirstateEntry` was created to hold child
465 465 // nodes, and is therefore a directory.
466 466 Ok(node.entry.is_none() && node.tracked_descendants_count > 0)
467 467 } else {
468 468 Ok(false)
469 469 }
470 470 }
471 471
472 472 fn has_dir(
473 473 &mut self,
474 474 directory: &HgPath,
475 475 ) -> Result<bool, DirstateMapError> {
476 476 if let Some(node) = self.get_node(directory) {
477 477 // A node without a `DirstateEntry` was created to hold child
478 478 // nodes, and is therefore a directory.
479 479 Ok(node.entry.is_none())
480 480 } else {
481 481 Ok(false)
482 482 }
483 483 }
484 484
485 485 #[timed]
486 486 fn pack_v1(
487 487 &mut self,
488 488 parents: DirstateParents,
489 489 now: Timestamp,
490 490 ) -> Result<Vec<u8>, DirstateError> {
491 491 let now: i32 = now.0.try_into().expect("time overflow");
492 492 let mut ambiguous_mtimes = Vec::new();
493 493 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
494 494 // reallocations
495 495 let mut size = parents.as_bytes().len();
496 496 for (path, node) in self.iter_nodes() {
497 497 if let Some(entry) = &node.entry {
498 498 size += packed_entry_size(
499 499 path,
500 500 node.copy_source.as_ref().map(|p| &**p),
501 501 );
502 502 if entry.mtime_is_ambiguous(now) {
503 503 ambiguous_mtimes.push(path.clone())
504 504 }
505 505 }
506 506 }
507 507 self.clear_known_ambiguous_mtimes(&ambiguous_mtimes);
508 508
509 509 let mut packed = Vec::with_capacity(size);
510 510 packed.extend(parents.as_bytes());
511 511
512 512 for (path, node) in self.iter_nodes() {
513 513 if let Some(entry) = &node.entry {
514 514 pack_entry(
515 515 path,
516 516 entry,
517 517 node.copy_source.as_ref().map(|p| &**p),
518 518 &mut packed,
519 519 );
520 520 }
521 521 }
522 522 Ok(packed)
523 523 }
524 524
525 525 #[timed]
526 526 fn pack_v2(
527 527 &mut self,
528 528 parents: DirstateParents,
529 529 now: Timestamp,
530 530 ) -> Result<Vec<u8>, DirstateError> {
531 531 // TODO: how do we want to handle this in 2038?
532 532 let now: i32 = now.0.try_into().expect("time overflow");
533 533 let mut paths = Vec::new();
534 534 for (path, node) in self.iter_nodes() {
535 535 if let Some(entry) = &node.entry {
536 536 if entry.mtime_is_ambiguous(now) {
537 537 paths.push(path.clone())
538 538 }
539 539 }
540 540 }
541 541 // Borrow of `self` ends here since we collect cloned paths
542 542
543 543 self.clear_known_ambiguous_mtimes(&paths);
544 544
545 545 on_disk::write(self, parents)
546 546 }
547 547
548 548 fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> {
549 549 // Do nothing, this `DirstateMap` does not a separate `all_dirs` that
550 550 // needs to be recomputed
551 551 Ok(())
552 552 }
553 553
554 554 fn set_dirs(&mut self) -> Result<(), DirstateMapError> {
555 555 // Do nothing, this `DirstateMap` does not a separate `dirs` that needs
556 556 // to be recomputed
557 557 Ok(())
558 558 }
559 559
560 560 fn status<'a>(
561 561 &'a mut self,
562 562 matcher: &'a (dyn Matcher + Sync),
563 563 root_dir: PathBuf,
564 564 ignore_files: Vec<PathBuf>,
565 565 options: StatusOptions,
566 566 ) -> Result<(DirstateStatus<'a>, Vec<PatternFileWarning>), StatusError>
567 567 {
568 568 super::status::status(self, matcher, root_dir, ignore_files, options)
569 569 }
570 570
571 571 fn copy_map_len(&self) -> usize {
572 572 self.nodes_with_copy_source_count as usize
573 573 }
574 574
575 575 fn copy_map_iter(&self) -> CopyMapIter<'_> {
576 576 Box::new(self.iter_nodes().filter_map(|(path, node)| {
577 577 node.copy_source
578 578 .as_ref()
579 579 .map(|copy_source| (&**path, &**copy_source))
580 580 }))
581 581 }
582 582
583 583 fn copy_map_contains_key(&self, key: &HgPath) -> bool {
584 584 if let Some(node) = self.get_node(key) {
585 585 node.copy_source.is_some()
586 586 } else {
587 587 false
588 588 }
589 589 }
590 590
591 591 fn copy_map_get(&self, key: &HgPath) -> Option<&HgPath> {
592 592 self.get_node(key)?.copy_source.as_ref().map(|p| &**p)
593 593 }
594 594
595 595 fn copy_map_remove(&mut self, key: &HgPath) -> Option<HgPathBuf> {
596 596 let count = &mut self.nodes_with_copy_source_count;
597 597 Self::get_node_mut(&mut self.root, key).and_then(|node| {
598 598 if node.copy_source.is_some() {
599 599 *count -= 1
600 600 }
601 601 node.copy_source.take().map(Cow::into_owned)
602 602 })
603 603 }
604 604
605 605 fn copy_map_insert(
606 606 &mut self,
607 607 key: HgPathBuf,
608 608 value: HgPathBuf,
609 609 ) -> Option<HgPathBuf> {
610 610 let node = Self::get_or_insert_node(
611 611 &mut self.root,
612 612 &key,
613 613 WithBasename::to_cow_owned,
614 614 |_ancestor| {},
615 615 );
616 616 if node.copy_source.is_none() {
617 617 self.nodes_with_copy_source_count += 1
618 618 }
619 619 node.copy_source.replace(value.into()).map(Cow::into_owned)
620 620 }
621 621
622 622 fn len(&self) -> usize {
623 623 self.nodes_with_entry_count as usize
624 624 }
625 625
626 626 fn contains_key(&self, key: &HgPath) -> bool {
627 627 self.get(key).is_some()
628 628 }
629 629
630 630 fn get(&self, key: &HgPath) -> Option<&DirstateEntry> {
631 631 self.get_node(key)?.entry.as_ref()
632 632 }
633 633
634 634 fn iter(&self) -> StateMapIter<'_> {
635 635 Box::new(self.iter_nodes().filter_map(|(path, node)| {
636 636 node.entry.as_ref().map(|entry| (&**path, entry))
637 637 }))
638 638 }
639 639 }
@@ -1,326 +1,326 b''
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! # File format
4 4 //!
5 5 //! The file starts with a fixed-sized header, whose layout is defined by the
6 6 //! `Header` struct. Its `root` field contains the slice (offset and length) to
7 7 //! the nodes representing the files and directories at the root of the
8 8 //! repository. Each node is also fixed-size, defined by the `Node` struct.
9 9 //! Nodes in turn contain slices to variable-size paths, and to their own child
10 10 //! nodes (if any) for nested files and directories.
11 11
12 12 use crate::dirstate_tree::dirstate_map::{self, DirstateMap};
13 13 use crate::dirstate_tree::path_with_basename::WithBasename;
14 14 use crate::errors::HgError;
15 15 use crate::utils::hg_path::HgPath;
16 16 use crate::DirstateEntry;
17 17 use crate::DirstateError;
18 18 use crate::DirstateParents;
19 19 use bytes_cast::unaligned::{I32Be, U32Be, U64Be};
20 20 use bytes_cast::BytesCast;
21 21 use std::borrow::Cow;
22 22 use std::convert::{TryFrom, TryInto};
23 23
24 24 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
25 25 /// This a redundant sanity check more than an actual "magic number" since
26 26 /// `.hg/requires` already governs which format should be used.
27 27 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
28 28
29 29 #[derive(BytesCast)]
30 30 #[repr(C)]
31 31 struct Header {
32 32 marker: [u8; V2_FORMAT_MARKER.len()],
33 33
34 34 /// `dirstatemap.parents()` in `mercurial/dirstate.py` relies on this
35 35 /// `parents` field being at this offset, immediately after `marker`.
36 36 parents: DirstateParents,
37 37
38 38 root: ChildNodes,
39 39 nodes_with_entry_count: Size,
40 40 nodes_with_copy_source_count: Size,
41 41 }
42 42
43 43 #[derive(BytesCast)]
44 44 #[repr(C)]
45 45 struct Node {
46 46 full_path: PathSlice,
47 47
48 48 /// In bytes from `self.full_path.start`
49 49 base_name_start: Size,
50 50
51 51 copy_source: OptPathSlice,
52 52 entry: OptEntry,
53 53 children: ChildNodes,
54 54 tracked_descendants_count: Size,
55 55 }
56 56
57 57 /// Either nothing if `state == b'\0'`, or a dirstate entry like in the v1
58 58 /// format
59 59 #[derive(BytesCast)]
60 60 #[repr(C)]
61 61 struct OptEntry {
62 62 state: u8,
63 63 mode: I32Be,
64 64 mtime: I32Be,
65 65 size: I32Be,
66 66 }
67 67
68 68 /// Counted in bytes from the start of the file
69 69 ///
70 70 /// NOTE: If we decide to never support `.hg/dirstate` files larger than 4 GiB
71 71 /// we could save space by using `U32Be` instead.
72 72 type Offset = U64Be;
73 73
74 74 /// Counted in number of items
75 75 ///
76 76 /// NOTE: not supporting directories with more than 4 billion direct children,
77 77 /// or filenames more than 4 GiB.
78 78 type Size = U32Be;
79 79
80 80 /// Location of consecutive, fixed-size items.
81 81 ///
82 82 /// An item can be a single byte for paths, or a struct with
83 83 /// `derive(BytesCast)`.
84 84 #[derive(BytesCast, Copy, Clone)]
85 85 #[repr(C)]
86 86 struct Slice {
87 87 start: Offset,
88 88 len: Size,
89 89 }
90 90
91 91 /// A contiguous sequence of `len` times `Node`, representing the child nodes
92 92 /// of either some other node or of the repository root.
93 93 ///
94 94 /// Always sorted by ascending `full_path`, to allow binary search.
95 95 /// Since nodes with the same parent nodes also have the same parent path,
96 96 /// only the `base_name`s need to be compared during binary search.
97 97 type ChildNodes = Slice;
98 98
99 99 /// A `HgPath` of `len` bytes
100 100 type PathSlice = Slice;
101 101
102 102 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
103 103 type OptPathSlice = Slice;
104 104
105 105 /// Make sure that size-affecting changes are made knowingly
106 106 fn _static_assert_size_of() {
107 107 let _ = std::mem::transmute::<Header, [u8; 72]>;
108 108 let _ = std::mem::transmute::<Node, [u8; 57]>;
109 109 }
110 110
111 111 pub(super) fn read<'on_disk>(
112 112 on_disk: &'on_disk [u8],
113 113 ) -> Result<(DirstateMap<'on_disk>, Option<DirstateParents>), DirstateError> {
114 114 if on_disk.is_empty() {
115 115 return Ok((DirstateMap::empty(on_disk), None));
116 116 }
117 117 let (header, _) = Header::from_bytes(on_disk)
118 118 .map_err(|_| HgError::corrupted("truncated dirstate-v2"))?;
119 119 let Header {
120 120 marker,
121 121 parents,
122 122 root,
123 123 nodes_with_entry_count,
124 124 nodes_with_copy_source_count,
125 125 } = header;
126 126 if marker != V2_FORMAT_MARKER {
127 127 return Err(HgError::corrupted("missing dirstated-v2 marker").into());
128 128 }
129 129 let dirstate_map = DirstateMap {
130 130 on_disk,
131 131 root: read_nodes(on_disk, *root)?,
132 132 nodes_with_entry_count: nodes_with_entry_count.get(),
133 133 nodes_with_copy_source_count: nodes_with_copy_source_count.get(),
134 134 };
135 135 let parents = Some(parents.clone());
136 136 Ok((dirstate_map, parents))
137 137 }
138 138
139 139 impl Node {
140 140 pub(super) fn path<'on_disk>(
141 141 &self,
142 142 on_disk: &'on_disk [u8],
143 143 ) -> Result<dirstate_map::NodeKey<'on_disk>, HgError> {
144 144 let full_path = read_hg_path(on_disk, self.full_path)?;
145 145 let base_name_start = usize::try_from(self.base_name_start.get())
146 146 // u32 -> usize, could only panic on a 16-bit CPU
147 147 .expect("dirstate-v2 base_name_start out of bounds");
148 148 if base_name_start < full_path.len() {
149 149 Ok(WithBasename::from_raw_parts(full_path, base_name_start))
150 150 } else {
151 151 Err(HgError::corrupted(
152 152 "dirstate-v2 base_name_start out of bounds",
153 153 ))
154 154 }
155 155 }
156 156
157 157 pub(super) fn copy_source<'on_disk>(
158 158 &self,
159 159 on_disk: &'on_disk [u8],
160 160 ) -> Result<Option<Cow<'on_disk, HgPath>>, HgError> {
161 161 Ok(if self.copy_source.start.get() != 0 {
162 162 Some(read_hg_path(on_disk, self.copy_source)?)
163 163 } else {
164 164 None
165 165 })
166 166 }
167 167
168 168 pub(super) fn entry(&self) -> Result<Option<DirstateEntry>, HgError> {
169 169 Ok(if self.entry.state != b'\0' {
170 170 Some(DirstateEntry {
171 171 state: self.entry.state.try_into()?,
172 172 mode: self.entry.mode.get(),
173 173 mtime: self.entry.mtime.get(),
174 174 size: self.entry.size.get(),
175 175 })
176 176 } else {
177 177 None
178 178 })
179 179 }
180 180
181 181 pub(super) fn to_in_memory_node<'on_disk>(
182 182 &self,
183 183 on_disk: &'on_disk [u8],
184 184 ) -> Result<dirstate_map::Node<'on_disk>, HgError> {
185 185 Ok(dirstate_map::Node {
186 186 children: read_nodes(on_disk, self.children)?,
187 187 copy_source: self.copy_source(on_disk)?,
188 188 entry: self.entry()?,
189 189 tracked_descendants_count: self.tracked_descendants_count.get(),
190 190 })
191 191 }
192 192 }
193 193
194 194 fn read_nodes(
195 195 on_disk: &[u8],
196 196 slice: ChildNodes,
197 197 ) -> Result<dirstate_map::ChildNodes, HgError> {
198 198 read_slice::<Node>(on_disk, slice)?
199 199 .iter()
200 200 .map(|node| {
201 201 Ok((node.path(on_disk)?, node.to_in_memory_node(on_disk)?))
202 202 })
203 203 .collect()
204 204 }
205 205
206 206 fn read_hg_path(on_disk: &[u8], slice: Slice) -> Result<Cow<HgPath>, HgError> {
207 207 let bytes = read_slice::<u8>(on_disk, slice)?;
208 208 Ok(Cow::Borrowed(HgPath::new(bytes)))
209 209 }
210 210
211 211 fn read_slice<T>(on_disk: &[u8], slice: Slice) -> Result<&[T], HgError>
212 212 where
213 213 T: BytesCast,
214 214 {
215 215 // Either `usize::MAX` would result in "out of bounds" error since a single
216 216 // `&[u8]` cannot occupy the entire addess space.
217 217 let start = usize::try_from(slice.start.get()).unwrap_or(std::usize::MAX);
218 218 let len = usize::try_from(slice.len.get()).unwrap_or(std::usize::MAX);
219 219 on_disk
220 220 .get(start..)
221 221 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
222 222 .map(|(slice, _rest)| slice)
223 223 .ok_or_else(|| {
224 224 HgError::corrupted("dirstate v2 slice is out of bounds")
225 225 })
226 226 }
227 227
228 228 pub(super) fn write(
229 229 dirstate_map: &mut DirstateMap,
230 230 parents: DirstateParents,
231 231 ) -> Result<Vec<u8>, DirstateError> {
232 232 let header_len = std::mem::size_of::<Header>();
233 233
234 234 // This ignores the space for paths, and for nodes without an entry.
235 235 // TODO: better estimate? Skip the `Vec` and write to a file directly?
236 236 let size_guess = header_len
237 237 + std::mem::size_of::<Node>()
238 238 * dirstate_map.nodes_with_entry_count as usize;
239 239 let mut out = Vec::with_capacity(size_guess);
240 240
241 241 // Keep space for the header. We’ll fill it out at the end when we know the
242 242 // actual offset for the root nodes.
243 243 out.resize(header_len, 0_u8);
244 244
245 245 let root = write_nodes(&mut dirstate_map.root, &mut out)?;
246 246
247 247 let header = Header {
248 248 marker: *V2_FORMAT_MARKER,
249 249 parents: parents,
250 250 root,
251 251 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
252 252 nodes_with_copy_source_count: dirstate_map
253 253 .nodes_with_copy_source_count
254 254 .into(),
255 255 };
256 256 out[..header_len].copy_from_slice(header.as_bytes());
257 257 Ok(out)
258 258 }
259 259
260 260 fn write_nodes(
261 nodes: &mut dirstate_map::ChildNodes,
261 nodes: &dirstate_map::ChildNodes,
262 262 out: &mut Vec<u8>,
263 263 ) -> Result<ChildNodes, DirstateError> {
264 264 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
265 265 // order. Sort to enable binary search in the written file.
266 266 let nodes = dirstate_map::Node::sorted(nodes);
267 267
268 268 // First accumulate serialized nodes in a `Vec`
269 269 let mut on_disk_nodes = Vec::with_capacity(nodes.len());
270 270 for (full_path, node) in nodes {
271 271 on_disk_nodes.push(Node {
272 children: write_nodes(&mut node.children, out)?,
272 children: write_nodes(&node.children, out)?,
273 273 tracked_descendants_count: node.tracked_descendants_count.into(),
274 274 full_path: write_slice::<u8>(
275 275 full_path.full_path().as_bytes(),
276 276 out,
277 277 ),
278 278 base_name_start: u32::try_from(full_path.base_name_start())
279 279 // Could only panic for paths over 4 GiB
280 280 .expect("dirstate-v2 offset overflow")
281 281 .into(),
282 282 copy_source: if let Some(source) = &node.copy_source {
283 283 write_slice::<u8>(source.as_bytes(), out)
284 284 } else {
285 285 Slice {
286 286 start: 0.into(),
287 287 len: 0.into(),
288 288 }
289 289 },
290 entry: if let Some(entry) = &mut node.entry {
290 entry: if let Some(entry) = &node.entry {
291 291 OptEntry {
292 292 state: entry.state.into(),
293 293 mode: entry.mode.into(),
294 294 mtime: entry.mtime.into(),
295 295 size: entry.size.into(),
296 296 }
297 297 } else {
298 298 OptEntry {
299 299 state: b'\0',
300 300 mode: 0.into(),
301 301 mtime: 0.into(),
302 302 size: 0.into(),
303 303 }
304 304 },
305 305 })
306 306 }
307 307 // … so we can write them contiguously
308 308 Ok(write_slice::<Node>(&on_disk_nodes, out))
309 309 }
310 310
311 311 fn write_slice<T>(slice: &[T], out: &mut Vec<u8>) -> Slice
312 312 where
313 313 T: BytesCast,
314 314 {
315 315 let start = u64::try_from(out.len())
316 316 // Could only panic on a 128-bit CPU with a dirstate over 16 EiB
317 317 .expect("dirstate-v2 offset overflow")
318 318 .into();
319 319 let len = u32::try_from(slice.len())
320 320 // Could only panic for paths over 4 GiB or nodes with over 4 billions
321 321 // child nodes
322 322 .expect("dirstate-v2 offset overflow")
323 323 .into();
324 324 out.extend(slice.as_bytes());
325 325 Slice { start, len }
326 326 }
@@ -1,426 +1,426 b''
1 1 use crate::dirstate::status::IgnoreFnType;
2 2 use crate::dirstate_tree::dirstate_map::ChildNodes;
3 3 use crate::dirstate_tree::dirstate_map::DirstateMap;
4 4 use crate::dirstate_tree::dirstate_map::Node;
5 5 use crate::matchers::get_ignore_function;
6 6 use crate::matchers::Matcher;
7 7 use crate::utils::files::get_bytes_from_os_string;
8 8 use crate::utils::hg_path::HgPath;
9 9 use crate::BadMatch;
10 10 use crate::DirstateStatus;
11 11 use crate::EntryState;
12 12 use crate::HgPathBuf;
13 13 use crate::PatternFileWarning;
14 14 use crate::StatusError;
15 15 use crate::StatusOptions;
16 16 use micro_timer::timed;
17 17 use rayon::prelude::*;
18 18 use std::borrow::Cow;
19 19 use std::io;
20 20 use std::path::Path;
21 21 use std::path::PathBuf;
22 22 use std::sync::Mutex;
23 23
24 24 /// Returns the status of the working directory compared to its parent
25 25 /// changeset.
26 26 ///
27 27 /// This algorithm is based on traversing the filesystem tree (`fs` in function
28 28 /// and variable names) and dirstate tree at the same time. The core of this
29 29 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
30 30 /// and its use of `itertools::merge_join_by`. When reaching a path that only
31 31 /// exists in one of the two trees, depending on information requested by
32 32 /// `options` we may need to traverse the remaining subtree.
33 33 #[timed]
34 34 pub fn status<'tree>(
35 35 dmap: &'tree mut DirstateMap,
36 36 matcher: &(dyn Matcher + Sync),
37 37 root_dir: PathBuf,
38 38 ignore_files: Vec<PathBuf>,
39 39 options: StatusOptions,
40 40 ) -> Result<(DirstateStatus<'tree>, Vec<PatternFileWarning>), StatusError> {
41 41 let (ignore_fn, warnings): (IgnoreFnType, _) =
42 42 if options.list_ignored || options.list_unknown {
43 43 get_ignore_function(ignore_files, &root_dir)?
44 44 } else {
45 45 (Box::new(|&_| true), vec![])
46 46 };
47 47
48 48 let common = StatusCommon {
49 49 options,
50 50 matcher,
51 51 ignore_fn,
52 52 outcome: Mutex::new(DirstateStatus::default()),
53 53 };
54 54 let is_at_repo_root = true;
55 55 let hg_path = HgPath::new("");
56 56 let has_ignored_ancestor = false;
57 57 common.traverse_fs_directory_and_dirstate(
58 58 has_ignored_ancestor,
59 &mut dmap.root,
59 &dmap.root,
60 60 hg_path,
61 61 &root_dir,
62 62 is_at_repo_root,
63 63 );
64 64 Ok((common.outcome.into_inner().unwrap(), warnings))
65 65 }
66 66
67 67 /// Bag of random things needed by various parts of the algorithm. Reduces the
68 68 /// number of parameters passed to functions.
69 69 struct StatusCommon<'tree, 'a> {
70 70 options: StatusOptions,
71 71 matcher: &'a (dyn Matcher + Sync),
72 72 ignore_fn: IgnoreFnType<'a>,
73 73 outcome: Mutex<DirstateStatus<'tree>>,
74 74 }
75 75
76 76 impl<'tree, 'a> StatusCommon<'tree, 'a> {
77 77 fn read_dir(
78 78 &self,
79 79 hg_path: &HgPath,
80 80 fs_path: &Path,
81 81 is_at_repo_root: bool,
82 82 ) -> Result<Vec<DirEntry>, ()> {
83 83 DirEntry::read_dir(fs_path, is_at_repo_root).map_err(|error| {
84 84 let errno = error.raw_os_error().expect("expected real OS error");
85 85 self.outcome
86 86 .lock()
87 87 .unwrap()
88 88 .bad
89 89 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
90 90 })
91 91 }
92 92
93 93 fn traverse_fs_directory_and_dirstate(
94 94 &self,
95 95 has_ignored_ancestor: bool,
96 dirstate_nodes: &'tree mut ChildNodes,
96 dirstate_nodes: &'tree ChildNodes,
97 97 directory_hg_path: &'tree HgPath,
98 98 directory_fs_path: &Path,
99 99 is_at_repo_root: bool,
100 100 ) {
101 101 let mut fs_entries = if let Ok(entries) = self.read_dir(
102 102 directory_hg_path,
103 103 directory_fs_path,
104 104 is_at_repo_root,
105 105 ) {
106 106 entries
107 107 } else {
108 108 return;
109 109 };
110 110
111 111 // `merge_join_by` requires both its input iterators to be sorted:
112 112
113 113 let dirstate_nodes = Node::sorted(dirstate_nodes);
114 114 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
115 115 // https://github.com/rust-lang/rust/issues/34162
116 116 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
117 117
118 118 itertools::merge_join_by(
119 119 dirstate_nodes,
120 120 &fs_entries,
121 121 |(full_path, _node), fs_entry| {
122 122 full_path.base_name().cmp(&fs_entry.base_name)
123 123 },
124 124 )
125 125 .par_bridge()
126 126 .for_each(|pair| {
127 127 use itertools::EitherOrBoth::*;
128 128 match pair {
129 129 Both((hg_path, dirstate_node), fs_entry) => {
130 130 self.traverse_fs_and_dirstate(
131 131 fs_entry,
132 132 hg_path.full_path(),
133 133 dirstate_node,
134 134 has_ignored_ancestor,
135 135 );
136 136 }
137 137 Left((hg_path, dirstate_node)) => self.traverse_dirstate_only(
138 138 hg_path.full_path(),
139 139 dirstate_node,
140 140 ),
141 141 Right(fs_entry) => self.traverse_fs_only(
142 142 has_ignored_ancestor,
143 143 directory_hg_path,
144 144 fs_entry,
145 145 ),
146 146 }
147 147 })
148 148 }
149 149
150 150 fn traverse_fs_and_dirstate(
151 151 &self,
152 152 fs_entry: &DirEntry,
153 153 hg_path: &'tree HgPath,
154 dirstate_node: &'tree mut Node,
154 dirstate_node: &'tree Node,
155 155 has_ignored_ancestor: bool,
156 156 ) {
157 157 let file_type = fs_entry.metadata.file_type();
158 158 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
159 159 if !file_or_symlink {
160 160 // If we previously had a file here, it was removed (with
161 161 // `hg rm` or similar) or deleted before it could be
162 162 // replaced by a directory or something else.
163 163 self.mark_removed_or_deleted_if_file(
164 164 hg_path,
165 165 dirstate_node.state(),
166 166 );
167 167 }
168 168 if file_type.is_dir() {
169 169 if self.options.collect_traversed_dirs {
170 170 self.outcome.lock().unwrap().traversed.push(hg_path.into())
171 171 }
172 172 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
173 173 let is_at_repo_root = false;
174 174 self.traverse_fs_directory_and_dirstate(
175 175 is_ignored,
176 &mut dirstate_node.children,
176 &dirstate_node.children,
177 177 hg_path,
178 178 &fs_entry.full_path,
179 179 is_at_repo_root,
180 180 );
181 181 } else {
182 182 if file_or_symlink && self.matcher.matches(hg_path) {
183 183 let full_path = Cow::from(hg_path);
184 184 if let Some(entry) = &dirstate_node.entry {
185 185 match entry.state {
186 186 EntryState::Added => {
187 187 self.outcome.lock().unwrap().added.push(full_path)
188 188 }
189 189 EntryState::Removed => self
190 190 .outcome
191 191 .lock()
192 192 .unwrap()
193 193 .removed
194 194 .push(full_path),
195 195 EntryState::Merged => self
196 196 .outcome
197 197 .lock()
198 198 .unwrap()
199 199 .modified
200 200 .push(full_path),
201 201 EntryState::Normal => {
202 202 self.handle_normal_file(
203 203 full_path,
204 204 dirstate_node,
205 205 entry,
206 206 fs_entry,
207 207 );
208 208 }
209 209 // This variant is not used in DirstateMap
210 210 // nodes
211 211 EntryState::Unknown => unreachable!(),
212 212 }
213 213 } else {
214 214 // `node.entry.is_none()` indicates a "directory"
215 215 // node, but the filesystem has a file
216 216 self.mark_unknown_or_ignored(
217 217 has_ignored_ancestor,
218 218 full_path,
219 219 )
220 220 }
221 221 }
222 222
223 for (child_hg_path, child_node) in &mut dirstate_node.children {
223 for (child_hg_path, child_node) in &dirstate_node.children {
224 224 self.traverse_dirstate_only(
225 225 child_hg_path.full_path(),
226 226 child_node,
227 227 )
228 228 }
229 229 }
230 230 }
231 231
232 232 /// A file with `EntryState::Normal` in the dirstate was found in the
233 233 /// filesystem
234 234 fn handle_normal_file(
235 235 &self,
236 236 full_path: Cow<'tree, HgPath>,
237 237 dirstate_node: &Node,
238 238 entry: &crate::DirstateEntry,
239 239 fs_entry: &DirEntry,
240 240 ) {
241 241 // Keep the low 31 bits
242 242 fn truncate_u64(value: u64) -> i32 {
243 243 (value & 0x7FFF_FFFF) as i32
244 244 }
245 245 fn truncate_i64(value: i64) -> i32 {
246 246 (value & 0x7FFF_FFFF) as i32
247 247 }
248 248
249 249 let mode_changed = || {
250 250 self.options.check_exec && entry.mode_changed(&fs_entry.metadata)
251 251 };
252 252 let size_changed = entry.size != truncate_u64(fs_entry.metadata.len());
253 253 if entry.size >= 0
254 254 && size_changed
255 255 && fs_entry.metadata.file_type().is_symlink()
256 256 {
257 257 // issue6456: Size returned may be longer due to encryption
258 258 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
259 259 self.outcome.lock().unwrap().unsure.push(full_path)
260 260 } else if dirstate_node.copy_source.is_some()
261 261 || entry.is_from_other_parent()
262 262 || (entry.size >= 0 && (size_changed || mode_changed()))
263 263 {
264 264 self.outcome.lock().unwrap().modified.push(full_path)
265 265 } else {
266 266 let mtime = mtime_seconds(&fs_entry.metadata);
267 267 if truncate_i64(mtime) != entry.mtime
268 268 || mtime == self.options.last_normal_time
269 269 {
270 270 self.outcome.lock().unwrap().unsure.push(full_path)
271 271 } else if self.options.list_clean {
272 272 self.outcome.lock().unwrap().clean.push(full_path)
273 273 }
274 274 }
275 275 }
276 276
277 277 /// A node in the dirstate tree has no corresponding filesystem entry
278 278 fn traverse_dirstate_only(
279 279 &self,
280 280 hg_path: &'tree HgPath,
281 dirstate_node: &'tree mut Node,
281 dirstate_node: &'tree Node,
282 282 ) {
283 283 self.mark_removed_or_deleted_if_file(hg_path, dirstate_node.state());
284 dirstate_node.children.par_iter_mut().for_each(
284 dirstate_node.children.par_iter().for_each(
285 285 |(child_hg_path, child_node)| {
286 286 self.traverse_dirstate_only(
287 287 child_hg_path.full_path(),
288 288 child_node,
289 289 )
290 290 },
291 291 )
292 292 }
293 293
294 294 /// A node in the dirstate tree has no corresponding *file* on the
295 295 /// filesystem
296 296 ///
297 297 /// Does nothing on a "directory" node
298 298 fn mark_removed_or_deleted_if_file(
299 299 &self,
300 300 hg_path: &'tree HgPath,
301 301 dirstate_node_state: Option<EntryState>,
302 302 ) {
303 303 if let Some(state) = dirstate_node_state {
304 304 if self.matcher.matches(hg_path) {
305 305 if let EntryState::Removed = state {
306 306 self.outcome.lock().unwrap().removed.push(hg_path.into())
307 307 } else {
308 308 self.outcome.lock().unwrap().deleted.push(hg_path.into())
309 309 }
310 310 }
311 311 }
312 312 }
313 313
314 314 /// Something in the filesystem has no corresponding dirstate node
315 315 fn traverse_fs_only(
316 316 &self,
317 317 has_ignored_ancestor: bool,
318 318 directory_hg_path: &HgPath,
319 319 fs_entry: &DirEntry,
320 320 ) {
321 321 let hg_path = directory_hg_path.join(&fs_entry.base_name);
322 322 let file_type = fs_entry.metadata.file_type();
323 323 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
324 324 if file_type.is_dir() {
325 325 let is_ignored =
326 326 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
327 327 let traverse_children = if is_ignored {
328 328 // Descendants of an ignored directory are all ignored
329 329 self.options.list_ignored
330 330 } else {
331 331 // Descendants of an unknown directory may be either unknown or
332 332 // ignored
333 333 self.options.list_unknown || self.options.list_ignored
334 334 };
335 335 if traverse_children {
336 336 let is_at_repo_root = false;
337 337 if let Ok(children_fs_entries) = self.read_dir(
338 338 &hg_path,
339 339 &fs_entry.full_path,
340 340 is_at_repo_root,
341 341 ) {
342 342 children_fs_entries.par_iter().for_each(|child_fs_entry| {
343 343 self.traverse_fs_only(
344 344 is_ignored,
345 345 &hg_path,
346 346 child_fs_entry,
347 347 )
348 348 })
349 349 }
350 350 }
351 351 if self.options.collect_traversed_dirs {
352 352 self.outcome.lock().unwrap().traversed.push(hg_path.into())
353 353 }
354 354 } else if file_or_symlink && self.matcher.matches(&hg_path) {
355 355 self.mark_unknown_or_ignored(has_ignored_ancestor, hg_path.into())
356 356 }
357 357 }
358 358
359 359 fn mark_unknown_or_ignored(
360 360 &self,
361 361 has_ignored_ancestor: bool,
362 362 hg_path: Cow<'tree, HgPath>,
363 363 ) {
364 364 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
365 365 if is_ignored {
366 366 if self.options.list_ignored {
367 367 self.outcome.lock().unwrap().ignored.push(hg_path)
368 368 }
369 369 } else {
370 370 if self.options.list_unknown {
371 371 self.outcome.lock().unwrap().unknown.push(hg_path)
372 372 }
373 373 }
374 374 }
375 375 }
376 376
377 377 #[cfg(unix)] // TODO
378 378 fn mtime_seconds(metadata: &std::fs::Metadata) -> i64 {
379 379 // Going through `Metadata::modified()` would be portable, but would take
380 380 // care to construct a `SystemTime` value with sub-second precision just
381 381 // for us to throw that away here.
382 382 use std::os::unix::fs::MetadataExt;
383 383 metadata.mtime()
384 384 }
385 385
386 386 struct DirEntry {
387 387 base_name: HgPathBuf,
388 388 full_path: PathBuf,
389 389 metadata: std::fs::Metadata,
390 390 }
391 391
392 392 impl DirEntry {
393 393 /// Returns **unsorted** entries in the given directory, with name and
394 394 /// metadata.
395 395 ///
396 396 /// If a `.hg` sub-directory is encountered:
397 397 ///
398 398 /// * At the repository root, ignore that sub-directory
399 399 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
400 400 /// list instead.
401 401 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
402 402 let mut results = Vec::new();
403 403 for entry in path.read_dir()? {
404 404 let entry = entry?;
405 405 let metadata = entry.metadata()?;
406 406 let name = get_bytes_from_os_string(entry.file_name());
407 407 // FIXME don't do this when cached
408 408 if name == b".hg" {
409 409 if is_at_repo_root {
410 410 // Skip the repo’s own .hg (might be a symlink)
411 411 continue;
412 412 } else if metadata.is_dir() {
413 413 // A .hg sub-directory at another location means a subrepo,
414 414 // skip it entirely.
415 415 return Ok(Vec::new());
416 416 }
417 417 }
418 418 results.push(DirEntry {
419 419 base_name: name.into(),
420 420 full_path: entry.path(),
421 421 metadata,
422 422 })
423 423 }
424 424 Ok(results)
425 425 }
426 426 }
General Comments 0
You need to be logged in to leave comments. Login now