##// END OF EJS Templates
dirstate-tree: Extract into a method sorting children of a given node...
Simon Sapin -
r48057:ce41ee53 default
parent child Browse files
Show More
@@ -1,654 +1,665 b''
1 1 use bytes_cast::BytesCast;
2 2 use micro_timer::timed;
3 3 use std::borrow::Cow;
4 4 use std::convert::TryInto;
5 5 use std::path::PathBuf;
6 6
7 7 use super::on_disk::V2_FORMAT_MARKER;
8 8 use super::path_with_basename::WithBasename;
9 9 use crate::dirstate::parsers::clear_ambiguous_mtime;
10 10 use crate::dirstate::parsers::pack_entry;
11 11 use crate::dirstate::parsers::packed_entry_size;
12 12 use crate::dirstate::parsers::parse_dirstate_entries;
13 13 use crate::dirstate::parsers::Timestamp;
14 14 use crate::errors::HgError;
15 15 use crate::matchers::Matcher;
16 16 use crate::utils::hg_path::{HgPath, HgPathBuf};
17 17 use crate::utils::SliceExt;
18 18 use crate::CopyMapIter;
19 19 use crate::DirstateEntry;
20 20 use crate::DirstateError;
21 21 use crate::DirstateMapError;
22 22 use crate::DirstateParents;
23 23 use crate::DirstateStatus;
24 24 use crate::EntryState;
25 25 use crate::FastHashMap;
26 26 use crate::PatternFileWarning;
27 27 use crate::StateMapIter;
28 28 use crate::StatusError;
29 29 use crate::StatusOptions;
30 30
31 31 pub struct DirstateMap<'on_disk> {
32 32 /// Contents of the `.hg/dirstate` file
33 33 on_disk: &'on_disk [u8],
34 34
35 35 pub(super) root: ChildNodes<'on_disk>,
36 36
37 37 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
38 38 nodes_with_entry_count: usize,
39 39
40 40 /// Number of nodes anywhere in the tree that have
41 41 /// `.copy_source.is_some()`.
42 42 nodes_with_copy_source_count: usize,
43 43 }
44 44
45 45 /// Using a plain `HgPathBuf` of the full path from the repository root as a
46 46 /// map key would also work: all paths in a given map have the same parent
47 47 /// path, so comparing full paths gives the same result as comparing base
48 /// names. However `BTreeMap` would waste time always re-comparing the same
48 /// names. However `HashMap` would waste time always re-hashing the same
49 49 /// string prefix.
50 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
50 51 pub(super) type ChildNodes<'on_disk> =
51 FastHashMap<WithBasename<Cow<'on_disk, HgPath>>, Node<'on_disk>>;
52 FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>;
52 53
53 54 /// Represents a file or a directory
54 55 #[derive(Default)]
55 56 pub(super) struct Node<'on_disk> {
56 57 /// `None` for directories
57 58 pub(super) entry: Option<DirstateEntry>,
58 59
59 60 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
60 61
61 62 pub(super) children: ChildNodes<'on_disk>,
62 63
63 64 /// How many (non-inclusive) descendants of this node are tracked files
64 65 tracked_descendants_count: usize,
65 66 }
66 67
67 impl Node<'_> {
68 impl<'on_disk> Node<'on_disk> {
68 69 pub(super) fn state(&self) -> Option<EntryState> {
69 70 self.entry.as_ref().map(|entry| entry.state)
70 71 }
72
73 pub(super) fn sorted<'tree>(
74 nodes: &'tree mut ChildNodes<'on_disk>,
75 ) -> Vec<(&'tree NodeKey<'on_disk>, &'tree mut Self)> {
76 let mut vec: Vec<_> = nodes.iter_mut().collect();
77 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
78 // https://github.com/rust-lang/rust/issues/34162
79 vec.sort_unstable_by(|(path1, _), (path2, _)| path1.cmp(path2));
80 vec
81 }
71 82 }
72 83
73 84 /// `(full_path, entry, copy_source)`
74 85 type NodeDataMut<'tree, 'on_disk> = (
75 86 &'tree HgPath,
76 87 &'tree mut Option<DirstateEntry>,
77 88 &'tree mut Option<Cow<'on_disk, HgPath>>,
78 89 );
79 90
80 91 impl<'on_disk> DirstateMap<'on_disk> {
81 92 #[timed]
82 93 pub fn new_v2(
83 94 on_disk: &'on_disk [u8],
84 95 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
85 96 if let Some(rest) = on_disk.drop_prefix(V2_FORMAT_MARKER) {
86 97 Self::new_v1(rest)
87 98 } else if on_disk.is_empty() {
88 99 Self::new_v1(on_disk)
89 100 } else {
90 101 return Err(HgError::corrupted(
91 102 "missing dirstate-v2 magic number",
92 103 )
93 104 .into());
94 105 }
95 106 }
96 107
97 108 #[timed]
98 109 pub fn new_v1(
99 110 on_disk: &'on_disk [u8],
100 111 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
101 112 let mut map = Self {
102 113 on_disk,
103 114 root: ChildNodes::default(),
104 115 nodes_with_entry_count: 0,
105 116 nodes_with_copy_source_count: 0,
106 117 };
107 118 if map.on_disk.is_empty() {
108 119 return Ok((map, None));
109 120 }
110 121
111 122 let parents = parse_dirstate_entries(
112 123 map.on_disk,
113 124 |path, entry, copy_source| {
114 125 let tracked = entry.state.is_tracked();
115 126 let node = Self::get_or_insert_node(
116 127 &mut map.root,
117 128 path,
118 129 WithBasename::to_cow_borrowed,
119 130 |ancestor| {
120 131 if tracked {
121 132 ancestor.tracked_descendants_count += 1
122 133 }
123 134 },
124 135 );
125 136 assert!(
126 137 node.entry.is_none(),
127 138 "duplicate dirstate entry in read"
128 139 );
129 140 assert!(
130 141 node.copy_source.is_none(),
131 142 "duplicate dirstate entry in read"
132 143 );
133 144 node.entry = Some(*entry);
134 145 node.copy_source = copy_source.map(Cow::Borrowed);
135 146 map.nodes_with_entry_count += 1;
136 147 if copy_source.is_some() {
137 148 map.nodes_with_copy_source_count += 1
138 149 }
139 150 },
140 151 )?;
141 152 let parents = Some(parents.clone());
142 153
143 154 Ok((map, parents))
144 155 }
145 156
146 157 fn get_node(&self, path: &HgPath) -> Option<&Node> {
147 158 let mut children = &self.root;
148 159 let mut components = path.components();
149 160 let mut component =
150 161 components.next().expect("expected at least one components");
151 162 loop {
152 163 let child = children.get(component)?;
153 164 if let Some(next_component) = components.next() {
154 165 component = next_component;
155 166 children = &child.children;
156 167 } else {
157 168 return Some(child);
158 169 }
159 170 }
160 171 }
161 172
162 173 /// Returns a mutable reference to the node at `path` if it exists
163 174 ///
164 175 /// This takes `root` instead of `&mut self` so that callers can mutate
165 176 /// other fields while the returned borrow is still valid
166 177 fn get_node_mut<'tree>(
167 178 root: &'tree mut ChildNodes<'on_disk>,
168 179 path: &HgPath,
169 180 ) -> Option<&'tree mut Node<'on_disk>> {
170 181 let mut children = root;
171 182 let mut components = path.components();
172 183 let mut component =
173 184 components.next().expect("expected at least one components");
174 185 loop {
175 186 let child = children.get_mut(component)?;
176 187 if let Some(next_component) = components.next() {
177 188 component = next_component;
178 189 children = &mut child.children;
179 190 } else {
180 191 return Some(child);
181 192 }
182 193 }
183 194 }
184 195
185 196 fn get_or_insert_node<'tree, 'path>(
186 197 root: &'tree mut ChildNodes<'on_disk>,
187 198 path: &'path HgPath,
188 199 to_cow: impl Fn(
189 200 WithBasename<&'path HgPath>,
190 201 ) -> WithBasename<Cow<'on_disk, HgPath>>,
191 202 mut each_ancestor: impl FnMut(&mut Node),
192 203 ) -> &'tree mut Node<'on_disk> {
193 204 let mut child_nodes = root;
194 205 let mut inclusive_ancestor_paths =
195 206 WithBasename::inclusive_ancestors_of(path);
196 207 let mut ancestor_path = inclusive_ancestor_paths
197 208 .next()
198 209 .expect("expected at least one inclusive ancestor");
199 210 loop {
200 211 // TODO: can we avoid allocating an owned key in cases where the
201 212 // map already contains that key, without introducing double
202 213 // lookup?
203 214 let child_node =
204 215 child_nodes.entry(to_cow(ancestor_path)).or_default();
205 216 if let Some(next) = inclusive_ancestor_paths.next() {
206 217 each_ancestor(child_node);
207 218 ancestor_path = next;
208 219 child_nodes = &mut child_node.children;
209 220 } else {
210 221 return child_node;
211 222 }
212 223 }
213 224 }
214 225
215 226 fn add_or_remove_file(
216 227 &mut self,
217 228 path: &HgPath,
218 229 old_state: EntryState,
219 230 new_entry: DirstateEntry,
220 231 ) {
221 232 let tracked_count_increment =
222 233 match (old_state.is_tracked(), new_entry.state.is_tracked()) {
223 234 (false, true) => 1,
224 235 (true, false) => -1,
225 236 _ => 0,
226 237 };
227 238
228 239 let node = Self::get_or_insert_node(
229 240 &mut self.root,
230 241 path,
231 242 WithBasename::to_cow_owned,
232 243 |ancestor| {
233 244 // We can’t use `+= increment` because the counter is unsigned,
234 245 // and we want debug builds to detect accidental underflow
235 246 // through zero
236 247 match tracked_count_increment {
237 248 1 => ancestor.tracked_descendants_count += 1,
238 249 -1 => ancestor.tracked_descendants_count -= 1,
239 250 _ => {}
240 251 }
241 252 },
242 253 );
243 254 if node.entry.is_none() {
244 255 self.nodes_with_entry_count += 1
245 256 }
246 257 node.entry = Some(new_entry)
247 258 }
248 259
249 260 fn iter_nodes<'a>(
250 261 &'a self,
251 262 ) -> impl Iterator<Item = (&'a HgPath, &'a Node)> + 'a {
252 263 // Depth first tree traversal.
253 264 //
254 265 // If we could afford internal iteration and recursion,
255 266 // this would look like:
256 267 //
257 268 // ```
258 269 // fn traverse_children(
259 270 // children: &ChildNodes,
260 271 // each: &mut impl FnMut(&Node),
261 272 // ) {
262 273 // for child in children.values() {
263 274 // traverse_children(&child.children, each);
264 275 // each(child);
265 276 // }
266 277 // }
267 278 // ```
268 279 //
269 280 // However we want an external iterator and therefore can’t use the
270 281 // call stack. Use an explicit stack instead:
271 282 let mut stack = Vec::new();
272 283 let mut iter = self.root.iter();
273 284 std::iter::from_fn(move || {
274 285 while let Some((key, child_node)) = iter.next() {
275 286 // Pseudo-recursion
276 287 let new_iter = child_node.children.iter();
277 288 let old_iter = std::mem::replace(&mut iter, new_iter);
278 289 let key = &**key.full_path();
279 290 stack.push((key, child_node, old_iter));
280 291 }
281 292 // Found the end of a `children.iter()` iterator.
282 293 if let Some((key, child_node, next_iter)) = stack.pop() {
283 294 // "Return" from pseudo-recursion by restoring state from the
284 295 // explicit stack
285 296 iter = next_iter;
286 297
287 298 Some((key, child_node))
288 299 } else {
289 300 // Reached the bottom of the stack, we’re done
290 301 None
291 302 }
292 303 })
293 304 }
294 305
295 306 /// Mutable iterator for the `(entry, copy source)` of each node.
296 307 ///
297 308 /// It would not be safe to yield mutable references to nodes themeselves
298 309 /// with `-> impl Iterator<Item = &mut Node>` since child nodes are
299 310 /// reachable from their ancestor nodes, potentially creating multiple
300 311 /// `&mut` references to a given node.
301 312 fn iter_node_data_mut<'tree>(
302 313 &'tree mut self,
303 314 ) -> impl Iterator<Item = NodeDataMut<'tree, 'on_disk>> + 'tree {
304 315 // Explict stack for pseudo-recursion, see `iter_nodes` above.
305 316 let mut stack = Vec::new();
306 317 let mut iter = self.root.iter_mut();
307 318 std::iter::from_fn(move || {
308 319 while let Some((key, child_node)) = iter.next() {
309 320 // Pseudo-recursion
310 321 let data = (
311 322 &**key.full_path(),
312 323 &mut child_node.entry,
313 324 &mut child_node.copy_source,
314 325 );
315 326 let new_iter = child_node.children.iter_mut();
316 327 let old_iter = std::mem::replace(&mut iter, new_iter);
317 328 stack.push((data, old_iter));
318 329 }
319 330 // Found the end of a `children.values_mut()` iterator.
320 331 if let Some((data, next_iter)) = stack.pop() {
321 332 // "Return" from pseudo-recursion by restoring state from the
322 333 // explicit stack
323 334 iter = next_iter;
324 335
325 336 Some(data)
326 337 } else {
327 338 // Reached the bottom of the stack, we’re done
328 339 None
329 340 }
330 341 })
331 342 }
332 343 }
333 344
334 345 impl<'on_disk> super::dispatch::DirstateMapMethods for DirstateMap<'on_disk> {
335 346 fn clear(&mut self) {
336 347 self.root.clear();
337 348 self.nodes_with_entry_count = 0;
338 349 self.nodes_with_copy_source_count = 0;
339 350 }
340 351
341 352 fn add_file(
342 353 &mut self,
343 354 filename: &HgPath,
344 355 old_state: EntryState,
345 356 entry: DirstateEntry,
346 357 ) -> Result<(), DirstateMapError> {
347 358 self.add_or_remove_file(filename, old_state, entry);
348 359 Ok(())
349 360 }
350 361
351 362 fn remove_file(
352 363 &mut self,
353 364 filename: &HgPath,
354 365 old_state: EntryState,
355 366 size: i32,
356 367 ) -> Result<(), DirstateMapError> {
357 368 let entry = DirstateEntry {
358 369 state: EntryState::Removed,
359 370 mode: 0,
360 371 size,
361 372 mtime: 0,
362 373 };
363 374 self.add_or_remove_file(filename, old_state, entry);
364 375 Ok(())
365 376 }
366 377
367 378 fn drop_file(
368 379 &mut self,
369 380 filename: &HgPath,
370 381 old_state: EntryState,
371 382 ) -> Result<bool, DirstateMapError> {
372 383 struct Dropped {
373 384 was_tracked: bool,
374 385 had_entry: bool,
375 386 had_copy_source: bool,
376 387 }
377 388 fn recur(nodes: &mut ChildNodes, path: &HgPath) -> Option<Dropped> {
378 389 let (first_path_component, rest_of_path) =
379 390 path.split_first_component();
380 391 let node = nodes.get_mut(first_path_component)?;
381 392 let dropped;
382 393 if let Some(rest) = rest_of_path {
383 394 dropped = recur(&mut node.children, rest)?;
384 395 if dropped.was_tracked {
385 396 node.tracked_descendants_count -= 1;
386 397 }
387 398 } else {
388 399 dropped = Dropped {
389 400 was_tracked: node
390 401 .entry
391 402 .as_ref()
392 403 .map_or(false, |entry| entry.state.is_tracked()),
393 404 had_entry: node.entry.take().is_some(),
394 405 had_copy_source: node.copy_source.take().is_some(),
395 406 };
396 407 }
397 408 // After recursion, for both leaf (rest_of_path is None) nodes and
398 409 // parent nodes, remove a node if it just became empty.
399 410 if node.entry.is_none()
400 411 && node.copy_source.is_none()
401 412 && node.children.is_empty()
402 413 {
403 414 nodes.remove(first_path_component);
404 415 }
405 416 Some(dropped)
406 417 }
407 418
408 419 if let Some(dropped) = recur(&mut self.root, filename) {
409 420 if dropped.had_entry {
410 421 self.nodes_with_entry_count -= 1
411 422 }
412 423 if dropped.had_copy_source {
413 424 self.nodes_with_copy_source_count -= 1
414 425 }
415 426 Ok(dropped.had_entry)
416 427 } else {
417 428 debug_assert!(!old_state.is_tracked());
418 429 Ok(false)
419 430 }
420 431 }
421 432
422 433 fn clear_ambiguous_times(&mut self, filenames: Vec<HgPathBuf>, now: i32) {
423 434 for filename in filenames {
424 435 if let Some(node) = Self::get_node_mut(&mut self.root, &filename) {
425 436 if let Some(entry) = node.entry.as_mut() {
426 437 clear_ambiguous_mtime(entry, now);
427 438 }
428 439 }
429 440 }
430 441 }
431 442
432 443 fn non_normal_entries_contains(&mut self, key: &HgPath) -> bool {
433 444 self.get_node(key)
434 445 .and_then(|node| node.entry.as_ref())
435 446 .map_or(false, DirstateEntry::is_non_normal)
436 447 }
437 448
438 449 fn non_normal_entries_remove(&mut self, _key: &HgPath) {
439 450 // Do nothing, this `DirstateMap` does not have a separate "non normal
440 451 // entries" set that need to be kept up to date
441 452 }
442 453
443 454 fn non_normal_or_other_parent_paths(
444 455 &mut self,
445 456 ) -> Box<dyn Iterator<Item = &HgPath> + '_> {
446 457 Box::new(self.iter_nodes().filter_map(|(path, node)| {
447 458 node.entry
448 459 .as_ref()
449 460 .filter(|entry| {
450 461 entry.is_non_normal() || entry.is_from_other_parent()
451 462 })
452 463 .map(|_| path)
453 464 }))
454 465 }
455 466
456 467 fn set_non_normal_other_parent_entries(&mut self, _force: bool) {
457 468 // Do nothing, this `DirstateMap` does not have a separate "non normal
458 469 // entries" and "from other parent" sets that need to be recomputed
459 470 }
460 471
461 472 fn iter_non_normal_paths(
462 473 &mut self,
463 474 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
464 475 self.iter_non_normal_paths_panic()
465 476 }
466 477
467 478 fn iter_non_normal_paths_panic(
468 479 &self,
469 480 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
470 481 Box::new(self.iter_nodes().filter_map(|(path, node)| {
471 482 node.entry
472 483 .as_ref()
473 484 .filter(|entry| entry.is_non_normal())
474 485 .map(|_| path)
475 486 }))
476 487 }
477 488
478 489 fn iter_other_parent_paths(
479 490 &mut self,
480 491 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
481 492 Box::new(self.iter_nodes().filter_map(|(path, node)| {
482 493 node.entry
483 494 .as_ref()
484 495 .filter(|entry| entry.is_from_other_parent())
485 496 .map(|_| path)
486 497 }))
487 498 }
488 499
489 500 fn has_tracked_dir(
490 501 &mut self,
491 502 directory: &HgPath,
492 503 ) -> Result<bool, DirstateMapError> {
493 504 if let Some(node) = self.get_node(directory) {
494 505 // A node without a `DirstateEntry` was created to hold child
495 506 // nodes, and is therefore a directory.
496 507 Ok(node.entry.is_none() && node.tracked_descendants_count > 0)
497 508 } else {
498 509 Ok(false)
499 510 }
500 511 }
501 512
502 513 fn has_dir(
503 514 &mut self,
504 515 directory: &HgPath,
505 516 ) -> Result<bool, DirstateMapError> {
506 517 if let Some(node) = self.get_node(directory) {
507 518 // A node without a `DirstateEntry` was created to hold child
508 519 // nodes, and is therefore a directory.
509 520 Ok(node.entry.is_none())
510 521 } else {
511 522 Ok(false)
512 523 }
513 524 }
514 525
515 526 #[timed]
516 527 fn pack_v1(
517 528 &mut self,
518 529 parents: DirstateParents,
519 530 now: Timestamp,
520 531 ) -> Result<Vec<u8>, DirstateError> {
521 532 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
522 533 // reallocations
523 534 let mut size = parents.as_bytes().len();
524 535 for (path, node) in self.iter_nodes() {
525 536 if node.entry.is_some() {
526 537 size += packed_entry_size(
527 538 path,
528 539 node.copy_source.as_ref().map(|p| &**p),
529 540 )
530 541 }
531 542 }
532 543
533 544 let mut packed = Vec::with_capacity(size);
534 545 packed.extend(parents.as_bytes());
535 546
536 547 let now: i32 = now.0.try_into().expect("time overflow");
537 548 for (path, opt_entry, copy_source) in self.iter_node_data_mut() {
538 549 if let Some(entry) = opt_entry {
539 550 clear_ambiguous_mtime(entry, now);
540 551 pack_entry(
541 552 path,
542 553 entry,
543 554 copy_source.as_ref().map(|p| &**p),
544 555 &mut packed,
545 556 );
546 557 }
547 558 }
548 559 Ok(packed)
549 560 }
550 561
551 562 #[timed]
552 563 fn pack_v2(
553 564 &mut self,
554 565 parents: DirstateParents,
555 566 now: Timestamp,
556 567 ) -> Result<Vec<u8>, DirstateError> {
557 568 // Inefficient but temporary
558 569 let mut v2 = V2_FORMAT_MARKER.to_vec();
559 570 v2.append(&mut self.pack_v1(parents, now)?);
560 571 Ok(v2)
561 572 }
562 573
563 574 fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> {
564 575 // Do nothing, this `DirstateMap` does not a separate `all_dirs` that
565 576 // needs to be recomputed
566 577 Ok(())
567 578 }
568 579
569 580 fn set_dirs(&mut self) -> Result<(), DirstateMapError> {
570 581 // Do nothing, this `DirstateMap` does not a separate `dirs` that needs
571 582 // to be recomputed
572 583 Ok(())
573 584 }
574 585
575 586 fn status<'a>(
576 587 &'a mut self,
577 588 matcher: &'a (dyn Matcher + Sync),
578 589 root_dir: PathBuf,
579 590 ignore_files: Vec<PathBuf>,
580 591 options: StatusOptions,
581 592 ) -> Result<(DirstateStatus<'a>, Vec<PatternFileWarning>), StatusError>
582 593 {
583 594 super::status::status(self, matcher, root_dir, ignore_files, options)
584 595 }
585 596
586 597 fn copy_map_len(&self) -> usize {
587 598 self.nodes_with_copy_source_count
588 599 }
589 600
590 601 fn copy_map_iter(&self) -> CopyMapIter<'_> {
591 602 Box::new(self.iter_nodes().filter_map(|(path, node)| {
592 603 node.copy_source
593 604 .as_ref()
594 605 .map(|copy_source| (path, &**copy_source))
595 606 }))
596 607 }
597 608
598 609 fn copy_map_contains_key(&self, key: &HgPath) -> bool {
599 610 if let Some(node) = self.get_node(key) {
600 611 node.copy_source.is_some()
601 612 } else {
602 613 false
603 614 }
604 615 }
605 616
606 617 fn copy_map_get(&self, key: &HgPath) -> Option<&HgPath> {
607 618 self.get_node(key)?.copy_source.as_ref().map(|p| &**p)
608 619 }
609 620
610 621 fn copy_map_remove(&mut self, key: &HgPath) -> Option<HgPathBuf> {
611 622 let count = &mut self.nodes_with_copy_source_count;
612 623 Self::get_node_mut(&mut self.root, key).and_then(|node| {
613 624 if node.copy_source.is_some() {
614 625 *count -= 1
615 626 }
616 627 node.copy_source.take().map(Cow::into_owned)
617 628 })
618 629 }
619 630
620 631 fn copy_map_insert(
621 632 &mut self,
622 633 key: HgPathBuf,
623 634 value: HgPathBuf,
624 635 ) -> Option<HgPathBuf> {
625 636 let node = Self::get_or_insert_node(
626 637 &mut self.root,
627 638 &key,
628 639 WithBasename::to_cow_owned,
629 640 |_ancestor| {},
630 641 );
631 642 if node.copy_source.is_none() {
632 643 self.nodes_with_copy_source_count += 1
633 644 }
634 645 node.copy_source.replace(value.into()).map(Cow::into_owned)
635 646 }
636 647
637 648 fn len(&self) -> usize {
638 649 self.nodes_with_entry_count
639 650 }
640 651
641 652 fn contains_key(&self, key: &HgPath) -> bool {
642 653 self.get(key).is_some()
643 654 }
644 655
645 656 fn get(&self, key: &HgPath) -> Option<&DirstateEntry> {
646 657 self.get_node(key)?.entry.as_ref()
647 658 }
648 659
649 660 fn iter(&self) -> StateMapIter<'_> {
650 661 Box::new(self.iter_nodes().filter_map(|(path, node)| {
651 662 node.entry.as_ref().map(|entry| (path, entry))
652 663 }))
653 664 }
654 665 }
@@ -1,428 +1,426 b''
1 1 use crate::dirstate::status::IgnoreFnType;
2 2 use crate::dirstate_tree::dirstate_map::ChildNodes;
3 3 use crate::dirstate_tree::dirstate_map::DirstateMap;
4 4 use crate::dirstate_tree::dirstate_map::Node;
5 5 use crate::matchers::get_ignore_function;
6 6 use crate::matchers::Matcher;
7 7 use crate::utils::files::get_bytes_from_os_string;
8 8 use crate::utils::hg_path::HgPath;
9 9 use crate::BadMatch;
10 10 use crate::DirstateStatus;
11 11 use crate::EntryState;
12 12 use crate::HgPathBuf;
13 13 use crate::PatternFileWarning;
14 14 use crate::StatusError;
15 15 use crate::StatusOptions;
16 16 use micro_timer::timed;
17 17 use rayon::prelude::*;
18 18 use std::borrow::Cow;
19 19 use std::io;
20 20 use std::path::Path;
21 21 use std::path::PathBuf;
22 22 use std::sync::Mutex;
23 23
24 24 /// Returns the status of the working directory compared to its parent
25 25 /// changeset.
26 26 ///
27 27 /// This algorithm is based on traversing the filesystem tree (`fs` in function
28 28 /// and variable names) and dirstate tree at the same time. The core of this
29 29 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
30 30 /// and its use of `itertools::merge_join_by`. When reaching a path that only
31 31 /// exists in one of the two trees, depending on information requested by
32 32 /// `options` we may need to traverse the remaining subtree.
33 33 #[timed]
34 34 pub fn status<'tree>(
35 35 dmap: &'tree mut DirstateMap,
36 36 matcher: &(dyn Matcher + Sync),
37 37 root_dir: PathBuf,
38 38 ignore_files: Vec<PathBuf>,
39 39 options: StatusOptions,
40 40 ) -> Result<(DirstateStatus<'tree>, Vec<PatternFileWarning>), StatusError> {
41 41 let (ignore_fn, warnings): (IgnoreFnType, _) =
42 42 if options.list_ignored || options.list_unknown {
43 43 get_ignore_function(ignore_files, &root_dir)?
44 44 } else {
45 45 (Box::new(|&_| true), vec![])
46 46 };
47 47
48 48 let common = StatusCommon {
49 49 options,
50 50 matcher,
51 51 ignore_fn,
52 52 outcome: Mutex::new(DirstateStatus::default()),
53 53 };
54 54 let is_at_repo_root = true;
55 55 let hg_path = HgPath::new("");
56 56 let has_ignored_ancestor = false;
57 57 common.traverse_fs_directory_and_dirstate(
58 58 has_ignored_ancestor,
59 59 &mut dmap.root,
60 60 hg_path,
61 61 &root_dir,
62 62 is_at_repo_root,
63 63 );
64 64 Ok((common.outcome.into_inner().unwrap(), warnings))
65 65 }
66 66
67 67 /// Bag of random things needed by various parts of the algorithm. Reduces the
68 68 /// number of parameters passed to functions.
69 69 struct StatusCommon<'tree, 'a> {
70 70 options: StatusOptions,
71 71 matcher: &'a (dyn Matcher + Sync),
72 72 ignore_fn: IgnoreFnType<'a>,
73 73 outcome: Mutex<DirstateStatus<'tree>>,
74 74 }
75 75
76 76 impl<'tree, 'a> StatusCommon<'tree, 'a> {
77 77 fn read_dir(
78 78 &self,
79 79 hg_path: &HgPath,
80 80 fs_path: &Path,
81 81 is_at_repo_root: bool,
82 82 ) -> Result<Vec<DirEntry>, ()> {
83 83 DirEntry::read_dir(fs_path, is_at_repo_root).map_err(|error| {
84 84 let errno = error.raw_os_error().expect("expected real OS error");
85 85 self.outcome
86 86 .lock()
87 87 .unwrap()
88 88 .bad
89 89 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
90 90 })
91 91 }
92 92
93 93 fn traverse_fs_directory_and_dirstate(
94 94 &self,
95 95 has_ignored_ancestor: bool,
96 96 dirstate_nodes: &'tree mut ChildNodes,
97 97 directory_hg_path: &'tree HgPath,
98 98 directory_fs_path: &Path,
99 99 is_at_repo_root: bool,
100 100 ) {
101 101 let mut fs_entries = if let Ok(entries) = self.read_dir(
102 102 directory_hg_path,
103 103 directory_fs_path,
104 104 is_at_repo_root,
105 105 ) {
106 106 entries
107 107 } else {
108 108 return;
109 109 };
110 110
111 111 // `merge_join_by` requires both its input iterators to be sorted:
112 112
113 let mut dirstate_nodes: Vec<_> = dirstate_nodes.iter_mut().collect();
113 let dirstate_nodes = Node::sorted(dirstate_nodes);
114 114 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
115 115 // https://github.com/rust-lang/rust/issues/34162
116 dirstate_nodes
117 .sort_unstable_by(|(path1, _), (path2, _)| path1.cmp(path2));
118 116 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
119 117
120 118 itertools::merge_join_by(
121 119 dirstate_nodes,
122 120 &fs_entries,
123 121 |(full_path, _node), fs_entry| {
124 122 full_path.base_name().cmp(&fs_entry.base_name)
125 123 },
126 124 )
127 125 .par_bridge()
128 126 .for_each(|pair| {
129 127 use itertools::EitherOrBoth::*;
130 128 match pair {
131 129 Both((hg_path, dirstate_node), fs_entry) => {
132 130 self.traverse_fs_and_dirstate(
133 131 fs_entry,
134 132 hg_path.full_path(),
135 133 dirstate_node,
136 134 has_ignored_ancestor,
137 135 );
138 136 }
139 137 Left((hg_path, dirstate_node)) => self.traverse_dirstate_only(
140 138 hg_path.full_path(),
141 139 dirstate_node,
142 140 ),
143 141 Right(fs_entry) => self.traverse_fs_only(
144 142 has_ignored_ancestor,
145 143 directory_hg_path,
146 144 fs_entry,
147 145 ),
148 146 }
149 147 })
150 148 }
151 149
152 150 fn traverse_fs_and_dirstate(
153 151 &self,
154 152 fs_entry: &DirEntry,
155 153 hg_path: &'tree HgPath,
156 154 dirstate_node: &'tree mut Node,
157 155 has_ignored_ancestor: bool,
158 156 ) {
159 157 let file_type = fs_entry.metadata.file_type();
160 158 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
161 159 if !file_or_symlink {
162 160 // If we previously had a file here, it was removed (with
163 161 // `hg rm` or similar) or deleted before it could be
164 162 // replaced by a directory or something else.
165 163 self.mark_removed_or_deleted_if_file(
166 164 hg_path,
167 165 dirstate_node.state(),
168 166 );
169 167 }
170 168 if file_type.is_dir() {
171 169 if self.options.collect_traversed_dirs {
172 170 self.outcome.lock().unwrap().traversed.push(hg_path.into())
173 171 }
174 172 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
175 173 let is_at_repo_root = false;
176 174 self.traverse_fs_directory_and_dirstate(
177 175 is_ignored,
178 176 &mut dirstate_node.children,
179 177 hg_path,
180 178 &fs_entry.full_path,
181 179 is_at_repo_root,
182 180 );
183 181 } else {
184 182 if file_or_symlink && self.matcher.matches(hg_path) {
185 183 let full_path = Cow::from(hg_path);
186 184 if let Some(entry) = &dirstate_node.entry {
187 185 match entry.state {
188 186 EntryState::Added => {
189 187 self.outcome.lock().unwrap().added.push(full_path)
190 188 }
191 189 EntryState::Removed => self
192 190 .outcome
193 191 .lock()
194 192 .unwrap()
195 193 .removed
196 194 .push(full_path),
197 195 EntryState::Merged => self
198 196 .outcome
199 197 .lock()
200 198 .unwrap()
201 199 .modified
202 200 .push(full_path),
203 201 EntryState::Normal => {
204 202 self.handle_normal_file(
205 203 full_path,
206 204 dirstate_node,
207 205 entry,
208 206 fs_entry,
209 207 );
210 208 }
211 209 // This variant is not used in DirstateMap
212 210 // nodes
213 211 EntryState::Unknown => unreachable!(),
214 212 }
215 213 } else {
216 214 // `node.entry.is_none()` indicates a "directory"
217 215 // node, but the filesystem has a file
218 216 self.mark_unknown_or_ignored(
219 217 has_ignored_ancestor,
220 218 full_path,
221 219 )
222 220 }
223 221 }
224 222
225 223 for (child_hg_path, child_node) in &mut dirstate_node.children {
226 224 self.traverse_dirstate_only(
227 225 child_hg_path.full_path(),
228 226 child_node,
229 227 )
230 228 }
231 229 }
232 230 }
233 231
234 232 /// A file with `EntryState::Normal` in the dirstate was found in the
235 233 /// filesystem
236 234 fn handle_normal_file(
237 235 &self,
238 236 full_path: Cow<'tree, HgPath>,
239 237 dirstate_node: &Node,
240 238 entry: &crate::DirstateEntry,
241 239 fs_entry: &DirEntry,
242 240 ) {
243 241 // Keep the low 31 bits
244 242 fn truncate_u64(value: u64) -> i32 {
245 243 (value & 0x7FFF_FFFF) as i32
246 244 }
247 245 fn truncate_i64(value: i64) -> i32 {
248 246 (value & 0x7FFF_FFFF) as i32
249 247 }
250 248
251 249 let mode_changed = || {
252 250 self.options.check_exec && entry.mode_changed(&fs_entry.metadata)
253 251 };
254 252 let size_changed = entry.size != truncate_u64(fs_entry.metadata.len());
255 253 if entry.size >= 0
256 254 && size_changed
257 255 && fs_entry.metadata.file_type().is_symlink()
258 256 {
259 257 // issue6456: Size returned may be longer due to encryption
260 258 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
261 259 self.outcome.lock().unwrap().unsure.push(full_path)
262 260 } else if dirstate_node.copy_source.is_some()
263 261 || entry.is_from_other_parent()
264 262 || (entry.size >= 0 && (size_changed || mode_changed()))
265 263 {
266 264 self.outcome.lock().unwrap().modified.push(full_path)
267 265 } else {
268 266 let mtime = mtime_seconds(&fs_entry.metadata);
269 267 if truncate_i64(mtime) != entry.mtime
270 268 || mtime == self.options.last_normal_time
271 269 {
272 270 self.outcome.lock().unwrap().unsure.push(full_path)
273 271 } else if self.options.list_clean {
274 272 self.outcome.lock().unwrap().clean.push(full_path)
275 273 }
276 274 }
277 275 }
278 276
279 277 /// A node in the dirstate tree has no corresponding filesystem entry
280 278 fn traverse_dirstate_only(
281 279 &self,
282 280 hg_path: &'tree HgPath,
283 281 dirstate_node: &'tree mut Node,
284 282 ) {
285 283 self.mark_removed_or_deleted_if_file(hg_path, dirstate_node.state());
286 284 dirstate_node.children.par_iter_mut().for_each(
287 285 |(child_hg_path, child_node)| {
288 286 self.traverse_dirstate_only(
289 287 child_hg_path.full_path(),
290 288 child_node,
291 289 )
292 290 },
293 291 )
294 292 }
295 293
296 294 /// A node in the dirstate tree has no corresponding *file* on the
297 295 /// filesystem
298 296 ///
299 297 /// Does nothing on a "directory" node
300 298 fn mark_removed_or_deleted_if_file(
301 299 &self,
302 300 hg_path: &'tree HgPath,
303 301 dirstate_node_state: Option<EntryState>,
304 302 ) {
305 303 if let Some(state) = dirstate_node_state {
306 304 if self.matcher.matches(hg_path) {
307 305 if let EntryState::Removed = state {
308 306 self.outcome.lock().unwrap().removed.push(hg_path.into())
309 307 } else {
310 308 self.outcome.lock().unwrap().deleted.push(hg_path.into())
311 309 }
312 310 }
313 311 }
314 312 }
315 313
316 314 /// Something in the filesystem has no corresponding dirstate node
317 315 fn traverse_fs_only(
318 316 &self,
319 317 has_ignored_ancestor: bool,
320 318 directory_hg_path: &HgPath,
321 319 fs_entry: &DirEntry,
322 320 ) {
323 321 let hg_path = directory_hg_path.join(&fs_entry.base_name);
324 322 let file_type = fs_entry.metadata.file_type();
325 323 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
326 324 if file_type.is_dir() {
327 325 let is_ignored =
328 326 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
329 327 let traverse_children = if is_ignored {
330 328 // Descendants of an ignored directory are all ignored
331 329 self.options.list_ignored
332 330 } else {
333 331 // Descendants of an unknown directory may be either unknown or
334 332 // ignored
335 333 self.options.list_unknown || self.options.list_ignored
336 334 };
337 335 if traverse_children {
338 336 let is_at_repo_root = false;
339 337 if let Ok(children_fs_entries) = self.read_dir(
340 338 &hg_path,
341 339 &fs_entry.full_path,
342 340 is_at_repo_root,
343 341 ) {
344 342 children_fs_entries.par_iter().for_each(|child_fs_entry| {
345 343 self.traverse_fs_only(
346 344 is_ignored,
347 345 &hg_path,
348 346 child_fs_entry,
349 347 )
350 348 })
351 349 }
352 350 }
353 351 if self.options.collect_traversed_dirs {
354 352 self.outcome.lock().unwrap().traversed.push(hg_path.into())
355 353 }
356 354 } else if file_or_symlink && self.matcher.matches(&hg_path) {
357 355 self.mark_unknown_or_ignored(has_ignored_ancestor, hg_path.into())
358 356 }
359 357 }
360 358
361 359 fn mark_unknown_or_ignored(
362 360 &self,
363 361 has_ignored_ancestor: bool,
364 362 hg_path: Cow<'tree, HgPath>,
365 363 ) {
366 364 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
367 365 if is_ignored {
368 366 if self.options.list_ignored {
369 367 self.outcome.lock().unwrap().ignored.push(hg_path)
370 368 }
371 369 } else {
372 370 if self.options.list_unknown {
373 371 self.outcome.lock().unwrap().unknown.push(hg_path)
374 372 }
375 373 }
376 374 }
377 375 }
378 376
379 377 #[cfg(unix)] // TODO
380 378 fn mtime_seconds(metadata: &std::fs::Metadata) -> i64 {
381 379 // Going through `Metadata::modified()` would be portable, but would take
382 380 // care to construct a `SystemTime` value with sub-second precision just
383 381 // for us to throw that away here.
384 382 use std::os::unix::fs::MetadataExt;
385 383 metadata.mtime()
386 384 }
387 385
388 386 struct DirEntry {
389 387 base_name: HgPathBuf,
390 388 full_path: PathBuf,
391 389 metadata: std::fs::Metadata,
392 390 }
393 391
394 392 impl DirEntry {
395 393 /// Returns **unsorted** entries in the given directory, with name and
396 394 /// metadata.
397 395 ///
398 396 /// If a `.hg` sub-directory is encountered:
399 397 ///
400 398 /// * At the repository root, ignore that sub-directory
401 399 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
402 400 /// list instead.
403 401 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
404 402 let mut results = Vec::new();
405 403 for entry in path.read_dir()? {
406 404 let entry = entry?;
407 405 let metadata = entry.metadata()?;
408 406 let name = get_bytes_from_os_string(entry.file_name());
409 407 // FIXME don't do this when cached
410 408 if name == b".hg" {
411 409 if is_at_repo_root {
412 410 // Skip the repo’s own .hg (might be a symlink)
413 411 continue;
414 412 } else if metadata.is_dir() {
415 413 // A .hg sub-directory at another location means a subrepo,
416 414 // skip it entirely.
417 415 return Ok(Vec::new());
418 416 }
419 417 }
420 418 results.push(DirEntry {
421 419 base_name: name.into(),
422 420 full_path: entry.path(),
423 421 metadata,
424 422 })
425 423 }
426 424 Ok(results)
427 425 }
428 426 }
General Comments 0
You need to be logged in to leave comments. Login now