##// END OF EJS Templates
dirstate-v2: Make the dirstate bytes buffer available in more places...
Simon Sapin -
r48127:8d0260d0 default
parent child Browse files
Show More
@@ -1,909 +1,932 b''
1 1 use bytes_cast::BytesCast;
2 2 use micro_timer::timed;
3 3 use std::borrow::Cow;
4 4 use std::convert::TryInto;
5 5 use std::path::PathBuf;
6 6
7 7 use super::on_disk;
8 8 use super::on_disk::DirstateV2ParseError;
9 9 use super::path_with_basename::WithBasename;
10 10 use crate::dirstate::parsers::pack_entry;
11 11 use crate::dirstate::parsers::packed_entry_size;
12 12 use crate::dirstate::parsers::parse_dirstate_entries;
13 13 use crate::dirstate::parsers::Timestamp;
14 14 use crate::matchers::Matcher;
15 15 use crate::utils::hg_path::{HgPath, HgPathBuf};
16 16 use crate::CopyMapIter;
17 17 use crate::DirstateEntry;
18 18 use crate::DirstateError;
19 19 use crate::DirstateParents;
20 20 use crate::DirstateStatus;
21 21 use crate::EntryState;
22 22 use crate::FastHashMap;
23 23 use crate::PatternFileWarning;
24 24 use crate::StateMapIter;
25 25 use crate::StatusError;
26 26 use crate::StatusOptions;
27 27
28 28 pub struct DirstateMap<'on_disk> {
29 29 /// Contents of the `.hg/dirstate` file
30 30 pub(super) on_disk: &'on_disk [u8],
31 31
32 32 pub(super) root: ChildNodes<'on_disk>,
33 33
34 34 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
35 35 pub(super) nodes_with_entry_count: u32,
36 36
37 37 /// Number of nodes anywhere in the tree that have
38 38 /// `.copy_source.is_some()`.
39 39 pub(super) nodes_with_copy_source_count: u32,
40 40 }
41 41
42 42 /// Using a plain `HgPathBuf` of the full path from the repository root as a
43 43 /// map key would also work: all paths in a given map have the same parent
44 44 /// path, so comparing full paths gives the same result as comparing base
45 45 /// names. However `HashMap` would waste time always re-hashing the same
46 46 /// string prefix.
47 47 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
48 48
49 49 pub(super) enum ChildNodes<'on_disk> {
50 50 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
51 51 }
52 52
53 53 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
54 54 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
55 55 }
56 56
57 57 pub(super) enum NodeRef<'tree, 'on_disk> {
58 58 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
59 59 }
60 60
61 61 impl Default for ChildNodes<'_> {
62 62 fn default() -> Self {
63 63 ChildNodes::InMemory(Default::default())
64 64 }
65 65 }
66 66
67 67 impl<'on_disk> ChildNodes<'on_disk> {
68 68 pub(super) fn as_ref<'tree>(
69 69 &'tree self,
70 70 ) -> ChildNodesRef<'tree, 'on_disk> {
71 71 match self {
72 72 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
73 73 }
74 74 }
75 75
76 76 pub(super) fn is_empty(&self) -> bool {
77 77 match self {
78 78 ChildNodes::InMemory(nodes) => nodes.is_empty(),
79 79 }
80 80 }
81 81
82 82 pub(super) fn make_mut(
83 83 &mut self,
84 _on_disk: &'on_disk [u8],
84 85 ) -> Result<
85 86 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
86 87 DirstateV2ParseError,
87 88 > {
88 89 match self {
89 90 ChildNodes::InMemory(nodes) => Ok(nodes),
90 91 }
91 92 }
92 93 }
93 94
94 95 impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
95 96 pub(super) fn get(
96 97 &self,
97 98 base_name: &HgPath,
99 _on_disk: &'on_disk [u8],
98 100 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
99 101 match self {
100 102 ChildNodesRef::InMemory(nodes) => Ok(nodes
101 103 .get_key_value(base_name)
102 104 .map(|(k, v)| NodeRef::InMemory(k, v))),
103 105 }
104 106 }
105 107
106 108 /// Iterate in undefined order
107 109 pub(super) fn iter(
108 110 &self,
109 111 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
110 112 match self {
111 113 ChildNodesRef::InMemory(nodes) => {
112 114 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v))
113 115 }
114 116 }
115 117 }
116 118
117 119 /// Iterate in parallel in undefined order
118 120 pub(super) fn par_iter(
119 121 &self,
120 122 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
121 123 {
122 124 use rayon::prelude::*;
123 125 match self {
124 126 ChildNodesRef::InMemory(nodes) => {
125 127 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v))
126 128 }
127 129 }
128 130 }
129 131
130 132 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
131 133 match self {
132 134 ChildNodesRef::InMemory(nodes) => {
133 135 let mut vec: Vec<_> = nodes
134 136 .iter()
135 137 .map(|(k, v)| NodeRef::InMemory(k, v))
136 138 .collect();
137 139 fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
138 140 match node {
139 141 NodeRef::InMemory(path, _node) => path.base_name(),
140 142 }
141 143 }
142 144 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
143 145 // value: https://github.com/rust-lang/rust/issues/34162
144 146 vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
145 147 vec
146 148 }
147 149 }
148 150 }
149 151 }
150 152
151 153 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
152 154 pub(super) fn full_path(
153 155 &self,
156 _on_disk: &'on_disk [u8],
154 157 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
155 158 match self {
156 159 NodeRef::InMemory(path, _node) => Ok(path.full_path()),
157 160 }
158 161 }
159 162
160 163 /// Returns a `Cow` that can borrow 'on_disk but is detached from 'tree
161 164 pub(super) fn full_path_cow(
162 165 &self,
166 _on_disk: &'on_disk [u8],
163 167 ) -> Result<Cow<'on_disk, HgPath>, DirstateV2ParseError> {
164 168 match self {
165 169 NodeRef::InMemory(path, _node) => Ok(path.full_path().clone()),
166 170 }
167 171 }
168 172
169 173 pub(super) fn base_name(
170 174 &self,
175 _on_disk: &'on_disk [u8],
171 176 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
172 177 match self {
173 178 NodeRef::InMemory(path, _node) => Ok(path.base_name()),
174 179 }
175 180 }
176 181
177 182 pub(super) fn children(
178 183 &self,
184 _on_disk: &'on_disk [u8],
179 185 ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
180 186 match self {
181 187 NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
182 188 }
183 189 }
184 190
185 191 pub(super) fn has_copy_source(&self) -> bool {
186 192 match self {
187 193 NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
188 194 }
189 195 }
190 196
191 197 pub(super) fn copy_source(
192 198 &self,
199 _on_disk: &'on_disk [u8],
193 200 ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
194 201 match self {
195 202 NodeRef::InMemory(_path, node) => {
196 203 Ok(node.copy_source.as_ref().map(|s| &**s))
197 204 }
198 205 }
199 206 }
200 207
201 208 pub(super) fn has_entry(&self) -> bool {
202 209 match self {
203 210 NodeRef::InMemory(_path, node) => node.entry.is_some(),
204 211 }
205 212 }
206 213
207 214 pub(super) fn entry(
208 215 &self,
209 216 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
210 217 match self {
211 218 NodeRef::InMemory(_path, node) => Ok(node.entry),
212 219 }
213 220 }
214 221
215 222 pub(super) fn state(
216 223 &self,
217 224 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
218 225 match self {
219 226 NodeRef::InMemory(_path, node) => {
220 227 Ok(node.entry.as_ref().map(|entry| entry.state))
221 228 }
222 229 }
223 230 }
224 231
225 232 pub(super) fn tracked_descendants_count(&self) -> u32 {
226 233 match self {
227 234 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
228 235 }
229 236 }
230 237 }
231 238
232 239 /// Represents a file or a directory
233 240 #[derive(Default)]
234 241 pub(super) struct Node<'on_disk> {
235 242 /// `None` for directories
236 243 pub(super) entry: Option<DirstateEntry>,
237 244
238 245 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
239 246
240 247 pub(super) children: ChildNodes<'on_disk>,
241 248
242 249 /// How many (non-inclusive) descendants of this node are tracked files
243 250 pub(super) tracked_descendants_count: u32,
244 251 }
245 252
246 253 impl<'on_disk> DirstateMap<'on_disk> {
247 254 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
248 255 Self {
249 256 on_disk,
250 257 root: ChildNodes::default(),
251 258 nodes_with_entry_count: 0,
252 259 nodes_with_copy_source_count: 0,
253 260 }
254 261 }
255 262
256 263 #[timed]
257 264 pub fn new_v2(
258 265 on_disk: &'on_disk [u8],
259 266 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
260 267 Ok(on_disk::read(on_disk)?)
261 268 }
262 269
263 270 #[timed]
264 271 pub fn new_v1(
265 272 on_disk: &'on_disk [u8],
266 273 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
267 274 let mut map = Self::empty(on_disk);
268 275 if map.on_disk.is_empty() {
269 276 return Ok((map, None));
270 277 }
271 278
272 279 let parents = parse_dirstate_entries(
273 280 map.on_disk,
274 281 |path, entry, copy_source| {
275 282 let tracked = entry.state.is_tracked();
276 283 let node = Self::get_or_insert_node(
284 map.on_disk,
277 285 &mut map.root,
278 286 path,
279 287 WithBasename::to_cow_borrowed,
280 288 |ancestor| {
281 289 if tracked {
282 290 ancestor.tracked_descendants_count += 1
283 291 }
284 292 },
285 293 )?;
286 294 assert!(
287 295 node.entry.is_none(),
288 296 "duplicate dirstate entry in read"
289 297 );
290 298 assert!(
291 299 node.copy_source.is_none(),
292 300 "duplicate dirstate entry in read"
293 301 );
294 302 node.entry = Some(*entry);
295 303 node.copy_source = copy_source.map(Cow::Borrowed);
296 304 map.nodes_with_entry_count += 1;
297 305 if copy_source.is_some() {
298 306 map.nodes_with_copy_source_count += 1
299 307 }
300 308 Ok(())
301 309 },
302 310 )?;
303 311 let parents = Some(parents.clone());
304 312
305 313 Ok((map, parents))
306 314 }
307 315
308 316 fn get_node<'tree>(
309 317 &'tree self,
310 318 path: &HgPath,
311 319 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
312 320 let mut children = self.root.as_ref();
313 321 let mut components = path.components();
314 322 let mut component =
315 323 components.next().expect("expected at least one components");
316 324 loop {
317 if let Some(child) = children.get(component)? {
325 if let Some(child) = children.get(component, self.on_disk)? {
318 326 if let Some(next_component) = components.next() {
319 327 component = next_component;
320 children = child.children()?;
328 children = child.children(self.on_disk)?;
321 329 } else {
322 330 return Ok(Some(child));
323 331 }
324 332 } else {
325 333 return Ok(None);
326 334 }
327 335 }
328 336 }
329 337
330 338 /// Returns a mutable reference to the node at `path` if it exists
331 339 ///
332 340 /// This takes `root` instead of `&mut self` so that callers can mutate
333 341 /// other fields while the returned borrow is still valid
334 342 fn get_node_mut<'tree>(
343 on_disk: &'on_disk [u8],
335 344 root: &'tree mut ChildNodes<'on_disk>,
336 345 path: &HgPath,
337 346 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
338 347 let mut children = root;
339 348 let mut components = path.components();
340 349 let mut component =
341 350 components.next().expect("expected at least one components");
342 351 loop {
343 if let Some(child) = children.make_mut()?.get_mut(component) {
352 if let Some(child) = children.make_mut(on_disk)?.get_mut(component)
353 {
344 354 if let Some(next_component) = components.next() {
345 355 component = next_component;
346 356 children = &mut child.children;
347 357 } else {
348 358 return Ok(Some(child));
349 359 }
350 360 } else {
351 361 return Ok(None);
352 362 }
353 363 }
354 364 }
355 365
356 366 fn get_or_insert_node<'tree, 'path>(
367 on_disk: &'on_disk [u8],
357 368 root: &'tree mut ChildNodes<'on_disk>,
358 369 path: &'path HgPath,
359 370 to_cow: impl Fn(
360 371 WithBasename<&'path HgPath>,
361 372 ) -> WithBasename<Cow<'on_disk, HgPath>>,
362 373 mut each_ancestor: impl FnMut(&mut Node),
363 374 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
364 375 let mut child_nodes = root;
365 376 let mut inclusive_ancestor_paths =
366 377 WithBasename::inclusive_ancestors_of(path);
367 378 let mut ancestor_path = inclusive_ancestor_paths
368 379 .next()
369 380 .expect("expected at least one inclusive ancestor");
370 381 loop {
371 382 // TODO: can we avoid allocating an owned key in cases where the
372 383 // map already contains that key, without introducing double
373 384 // lookup?
374 385 let child_node = child_nodes
375 .make_mut()?
386 .make_mut(on_disk)?
376 387 .entry(to_cow(ancestor_path))
377 388 .or_default();
378 389 if let Some(next) = inclusive_ancestor_paths.next() {
379 390 each_ancestor(child_node);
380 391 ancestor_path = next;
381 392 child_nodes = &mut child_node.children;
382 393 } else {
383 394 return Ok(child_node);
384 395 }
385 396 }
386 397 }
387 398
388 399 fn add_or_remove_file(
389 400 &mut self,
390 401 path: &HgPath,
391 402 old_state: EntryState,
392 403 new_entry: DirstateEntry,
393 404 ) -> Result<(), DirstateV2ParseError> {
394 405 let tracked_count_increment =
395 406 match (old_state.is_tracked(), new_entry.state.is_tracked()) {
396 407 (false, true) => 1,
397 408 (true, false) => -1,
398 409 _ => 0,
399 410 };
400 411
401 412 let node = Self::get_or_insert_node(
413 self.on_disk,
402 414 &mut self.root,
403 415 path,
404 416 WithBasename::to_cow_owned,
405 417 |ancestor| {
406 418 // We can’t use `+= increment` because the counter is unsigned,
407 419 // and we want debug builds to detect accidental underflow
408 420 // through zero
409 421 match tracked_count_increment {
410 422 1 => ancestor.tracked_descendants_count += 1,
411 423 -1 => ancestor.tracked_descendants_count -= 1,
412 424 _ => {}
413 425 }
414 426 },
415 427 )?;
416 428 if node.entry.is_none() {
417 429 self.nodes_with_entry_count += 1
418 430 }
419 431 node.entry = Some(new_entry);
420 432 Ok(())
421 433 }
422 434
423 435 fn iter_nodes<'tree>(
424 436 &'tree self,
425 437 ) -> impl Iterator<
426 438 Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>,
427 439 > + 'tree {
428 440 // Depth first tree traversal.
429 441 //
430 442 // If we could afford internal iteration and recursion,
431 443 // this would look like:
432 444 //
433 445 // ```
434 446 // fn traverse_children(
435 447 // children: &ChildNodes,
436 448 // each: &mut impl FnMut(&Node),
437 449 // ) {
438 450 // for child in children.values() {
439 451 // traverse_children(&child.children, each);
440 452 // each(child);
441 453 // }
442 454 // }
443 455 // ```
444 456 //
445 457 // However we want an external iterator and therefore can’t use the
446 458 // call stack. Use an explicit stack instead:
447 459 let mut stack = Vec::new();
448 460 let mut iter = self.root.as_ref().iter();
449 461 std::iter::from_fn(move || {
450 462 while let Some(child_node) = iter.next() {
451 let children = match child_node.children() {
463 let children = match child_node.children(self.on_disk) {
452 464 Ok(children) => children,
453 465 Err(error) => return Some(Err(error)),
454 466 };
455 467 // Pseudo-recursion
456 468 let new_iter = children.iter();
457 469 let old_iter = std::mem::replace(&mut iter, new_iter);
458 470 stack.push((child_node, old_iter));
459 471 }
460 472 // Found the end of a `children.iter()` iterator.
461 473 if let Some((child_node, next_iter)) = stack.pop() {
462 474 // "Return" from pseudo-recursion by restoring state from the
463 475 // explicit stack
464 476 iter = next_iter;
465 477
466 478 Some(Ok(child_node))
467 479 } else {
468 480 // Reached the bottom of the stack, we’re done
469 481 None
470 482 }
471 483 })
472 484 }
473 485
474 486 fn clear_known_ambiguous_mtimes(
475 487 &mut self,
476 488 paths: &[impl AsRef<HgPath>],
477 489 ) -> Result<(), DirstateV2ParseError> {
478 490 for path in paths {
479 if let Some(node) =
480 Self::get_node_mut(&mut self.root, path.as_ref())?
481 {
491 if let Some(node) = Self::get_node_mut(
492 self.on_disk,
493 &mut self.root,
494 path.as_ref(),
495 )? {
482 496 if let Some(entry) = node.entry.as_mut() {
483 497 entry.clear_mtime();
484 498 }
485 499 }
486 500 }
487 501 Ok(())
488 502 }
489 503
490 504 /// Return a faillilble iterator of full paths of nodes that have an
491 505 /// `entry` for which the given `predicate` returns true.
492 506 ///
493 507 /// Fallibility means that each iterator item is a `Result`, which may
494 508 /// indicate a parse error of the on-disk dirstate-v2 format. Such errors
495 509 /// should only happen if Mercurial is buggy or a repository is corrupted.
496 510 fn filter_full_paths<'tree>(
497 511 &'tree self,
498 512 predicate: impl Fn(&DirstateEntry) -> bool + 'tree,
499 513 ) -> impl Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + 'tree
500 514 {
501 515 filter_map_results(self.iter_nodes(), move |node| {
502 516 if let Some(entry) = node.entry()? {
503 517 if predicate(&entry) {
504 return Ok(Some(node.full_path()?));
518 return Ok(Some(node.full_path(self.on_disk)?));
505 519 }
506 520 }
507 521 Ok(None)
508 522 })
509 523 }
510 524 }
511 525
512 526 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
513 527 ///
514 528 /// The callback is only called for incoming `Ok` values. Errors are passed
515 529 /// through as-is. In order to let it use the `?` operator the callback is
516 530 /// expected to return a `Result` of `Option`, instead of an `Option` of
517 531 /// `Result`.
518 532 fn filter_map_results<'a, I, F, A, B, E>(
519 533 iter: I,
520 534 f: F,
521 535 ) -> impl Iterator<Item = Result<B, E>> + 'a
522 536 where
523 537 I: Iterator<Item = Result<A, E>> + 'a,
524 538 F: Fn(A) -> Result<Option<B>, E> + 'a,
525 539 {
526 540 iter.filter_map(move |result| match result {
527 541 Ok(node) => f(node).transpose(),
528 542 Err(e) => Some(Err(e)),
529 543 })
530 544 }
531 545
532 546 impl<'on_disk> super::dispatch::DirstateMapMethods for DirstateMap<'on_disk> {
533 547 fn clear(&mut self) {
534 548 self.root = Default::default();
535 549 self.nodes_with_entry_count = 0;
536 550 self.nodes_with_copy_source_count = 0;
537 551 }
538 552
539 553 fn add_file(
540 554 &mut self,
541 555 filename: &HgPath,
542 556 old_state: EntryState,
543 557 entry: DirstateEntry,
544 558 ) -> Result<(), DirstateError> {
545 559 Ok(self.add_or_remove_file(filename, old_state, entry)?)
546 560 }
547 561
548 562 fn remove_file(
549 563 &mut self,
550 564 filename: &HgPath,
551 565 old_state: EntryState,
552 566 size: i32,
553 567 ) -> Result<(), DirstateError> {
554 568 let entry = DirstateEntry {
555 569 state: EntryState::Removed,
556 570 mode: 0,
557 571 size,
558 572 mtime: 0,
559 573 };
560 574 Ok(self.add_or_remove_file(filename, old_state, entry)?)
561 575 }
562 576
563 577 fn drop_file(
564 578 &mut self,
565 579 filename: &HgPath,
566 580 old_state: EntryState,
567 581 ) -> Result<bool, DirstateError> {
568 582 struct Dropped {
569 583 was_tracked: bool,
570 584 had_entry: bool,
571 585 had_copy_source: bool,
572 586 }
573 fn recur(
574 nodes: &mut ChildNodes,
587 fn recur<'on_disk>(
588 on_disk: &'on_disk [u8],
589 nodes: &mut ChildNodes<'on_disk>,
575 590 path: &HgPath,
576 591 ) -> Result<Option<Dropped>, DirstateV2ParseError> {
577 592 let (first_path_component, rest_of_path) =
578 593 path.split_first_component();
579 594 let node = if let Some(node) =
580 nodes.make_mut()?.get_mut(first_path_component)
595 nodes.make_mut(on_disk)?.get_mut(first_path_component)
581 596 {
582 597 node
583 598 } else {
584 599 return Ok(None);
585 600 };
586 601 let dropped;
587 602 if let Some(rest) = rest_of_path {
588 if let Some(d) = recur(&mut node.children, rest)? {
603 if let Some(d) = recur(on_disk, &mut node.children, rest)? {
589 604 dropped = d;
590 605 if dropped.was_tracked {
591 606 node.tracked_descendants_count -= 1;
592 607 }
593 608 } else {
594 609 return Ok(None);
595 610 }
596 611 } else {
597 612 dropped = Dropped {
598 613 was_tracked: node
599 614 .entry
600 615 .as_ref()
601 616 .map_or(false, |entry| entry.state.is_tracked()),
602 617 had_entry: node.entry.take().is_some(),
603 618 had_copy_source: node.copy_source.take().is_some(),
604 619 };
605 620 }
606 621 // After recursion, for both leaf (rest_of_path is None) nodes and
607 622 // parent nodes, remove a node if it just became empty.
608 623 if node.entry.is_none()
609 624 && node.copy_source.is_none()
610 625 && node.children.is_empty()
611 626 {
612 nodes.make_mut()?.remove(first_path_component);
627 nodes.make_mut(on_disk)?.remove(first_path_component);
613 628 }
614 629 Ok(Some(dropped))
615 630 }
616 631
617 if let Some(dropped) = recur(&mut self.root, filename)? {
632 if let Some(dropped) = recur(self.on_disk, &mut self.root, filename)? {
618 633 if dropped.had_entry {
619 634 self.nodes_with_entry_count -= 1
620 635 }
621 636 if dropped.had_copy_source {
622 637 self.nodes_with_copy_source_count -= 1
623 638 }
624 639 Ok(dropped.had_entry)
625 640 } else {
626 641 debug_assert!(!old_state.is_tracked());
627 642 Ok(false)
628 643 }
629 644 }
630 645
631 646 fn clear_ambiguous_times(
632 647 &mut self,
633 648 filenames: Vec<HgPathBuf>,
634 649 now: i32,
635 650 ) -> Result<(), DirstateV2ParseError> {
636 651 for filename in filenames {
637 if let Some(node) = Self::get_node_mut(&mut self.root, &filename)?
652 if let Some(node) =
653 Self::get_node_mut(self.on_disk, &mut self.root, &filename)?
638 654 {
639 655 if let Some(entry) = node.entry.as_mut() {
640 656 entry.clear_ambiguous_mtime(now);
641 657 }
642 658 }
643 659 }
644 660 Ok(())
645 661 }
646 662
647 663 fn non_normal_entries_contains(
648 664 &mut self,
649 665 key: &HgPath,
650 666 ) -> Result<bool, DirstateV2ParseError> {
651 667 Ok(if let Some(node) = self.get_node(key)? {
652 668 node.entry()?.map_or(false, |entry| entry.is_non_normal())
653 669 } else {
654 670 false
655 671 })
656 672 }
657 673
658 674 fn non_normal_entries_remove(&mut self, _key: &HgPath) {
659 675 // Do nothing, this `DirstateMap` does not have a separate "non normal
660 676 // entries" set that need to be kept up to date
661 677 }
662 678
663 679 fn non_normal_or_other_parent_paths(
664 680 &mut self,
665 681 ) -> Box<dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + '_>
666 682 {
667 683 Box::new(self.filter_full_paths(|entry| {
668 684 entry.is_non_normal() || entry.is_from_other_parent()
669 685 }))
670 686 }
671 687
672 688 fn set_non_normal_other_parent_entries(&mut self, _force: bool) {
673 689 // Do nothing, this `DirstateMap` does not have a separate "non normal
674 690 // entries" and "from other parent" sets that need to be recomputed
675 691 }
676 692
677 693 fn iter_non_normal_paths(
678 694 &mut self,
679 695 ) -> Box<
680 696 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_,
681 697 > {
682 698 self.iter_non_normal_paths_panic()
683 699 }
684 700
685 701 fn iter_non_normal_paths_panic(
686 702 &self,
687 703 ) -> Box<
688 704 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_,
689 705 > {
690 706 Box::new(self.filter_full_paths(|entry| entry.is_non_normal()))
691 707 }
692 708
693 709 fn iter_other_parent_paths(
694 710 &mut self,
695 711 ) -> Box<
696 712 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_,
697 713 > {
698 714 Box::new(self.filter_full_paths(|entry| entry.is_from_other_parent()))
699 715 }
700 716
701 717 fn has_tracked_dir(
702 718 &mut self,
703 719 directory: &HgPath,
704 720 ) -> Result<bool, DirstateError> {
705 721 if let Some(node) = self.get_node(directory)? {
706 722 // A node without a `DirstateEntry` was created to hold child
707 723 // nodes, and is therefore a directory.
708 724 Ok(!node.has_entry() && node.tracked_descendants_count() > 0)
709 725 } else {
710 726 Ok(false)
711 727 }
712 728 }
713 729
714 730 fn has_dir(&mut self, directory: &HgPath) -> Result<bool, DirstateError> {
715 731 if let Some(node) = self.get_node(directory)? {
716 732 // A node without a `DirstateEntry` was created to hold child
717 733 // nodes, and is therefore a directory.
718 734 Ok(!node.has_entry())
719 735 } else {
720 736 Ok(false)
721 737 }
722 738 }
723 739
724 740 #[timed]
725 741 fn pack_v1(
726 742 &mut self,
727 743 parents: DirstateParents,
728 744 now: Timestamp,
729 745 ) -> Result<Vec<u8>, DirstateError> {
730 746 let now: i32 = now.0.try_into().expect("time overflow");
731 747 let mut ambiguous_mtimes = Vec::new();
732 748 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
733 749 // reallocations
734 750 let mut size = parents.as_bytes().len();
735 751 for node in self.iter_nodes() {
736 752 let node = node?;
737 753 if let Some(entry) = node.entry()? {
738 size +=
739 packed_entry_size(node.full_path()?, node.copy_source()?);
754 size += packed_entry_size(
755 node.full_path(self.on_disk)?,
756 node.copy_source(self.on_disk)?,
757 );
740 758 if entry.mtime_is_ambiguous(now) {
741 ambiguous_mtimes.push(node.full_path_cow()?)
759 ambiguous_mtimes.push(node.full_path_cow(self.on_disk)?)
742 760 }
743 761 }
744 762 }
745 763 self.clear_known_ambiguous_mtimes(&ambiguous_mtimes)?;
746 764
747 765 let mut packed = Vec::with_capacity(size);
748 766 packed.extend(parents.as_bytes());
749 767
750 768 for node in self.iter_nodes() {
751 769 let node = node?;
752 770 if let Some(entry) = node.entry()? {
753 771 pack_entry(
754 node.full_path()?,
772 node.full_path(self.on_disk)?,
755 773 &entry,
756 node.copy_source()?,
774 node.copy_source(self.on_disk)?,
757 775 &mut packed,
758 776 );
759 777 }
760 778 }
761 779 Ok(packed)
762 780 }
763 781
764 782 #[timed]
765 783 fn pack_v2(
766 784 &mut self,
767 785 parents: DirstateParents,
768 786 now: Timestamp,
769 787 ) -> Result<Vec<u8>, DirstateError> {
770 788 // TODO: how do we want to handle this in 2038?
771 789 let now: i32 = now.0.try_into().expect("time overflow");
772 790 let mut paths = Vec::new();
773 791 for node in self.iter_nodes() {
774 792 let node = node?;
775 793 if let Some(entry) = node.entry()? {
776 794 if entry.mtime_is_ambiguous(now) {
777 paths.push(node.full_path_cow()?)
795 paths.push(node.full_path_cow(self.on_disk)?)
778 796 }
779 797 }
780 798 }
781 799 // Borrow of `self` ends here since we collect cloned paths
782 800
783 801 self.clear_known_ambiguous_mtimes(&paths)?;
784 802
785 803 on_disk::write(self, parents)
786 804 }
787 805
788 806 fn set_all_dirs(&mut self) -> Result<(), DirstateError> {
789 807 // Do nothing, this `DirstateMap` does not a separate `all_dirs` that
790 808 // needs to be recomputed
791 809 Ok(())
792 810 }
793 811
794 812 fn set_dirs(&mut self) -> Result<(), DirstateError> {
795 813 // Do nothing, this `DirstateMap` does not a separate `dirs` that needs
796 814 // to be recomputed
797 815 Ok(())
798 816 }
799 817
800 818 fn status<'a>(
801 819 &'a mut self,
802 820 matcher: &'a (dyn Matcher + Sync),
803 821 root_dir: PathBuf,
804 822 ignore_files: Vec<PathBuf>,
805 823 options: StatusOptions,
806 824 ) -> Result<(DirstateStatus<'a>, Vec<PatternFileWarning>), StatusError>
807 825 {
808 826 super::status::status(self, matcher, root_dir, ignore_files, options)
809 827 }
810 828
811 829 fn copy_map_len(&self) -> usize {
812 830 self.nodes_with_copy_source_count as usize
813 831 }
814 832
815 833 fn copy_map_iter(&self) -> CopyMapIter<'_> {
816 Box::new(filter_map_results(self.iter_nodes(), |node| {
817 Ok(if let Some(source) = node.copy_source()? {
818 Some((node.full_path()?, source))
834 Box::new(filter_map_results(self.iter_nodes(), move |node| {
835 Ok(if let Some(source) = node.copy_source(self.on_disk)? {
836 Some((node.full_path(self.on_disk)?, source))
819 837 } else {
820 838 None
821 839 })
822 840 }))
823 841 }
824 842
825 843 fn copy_map_contains_key(
826 844 &self,
827 845 key: &HgPath,
828 846 ) -> Result<bool, DirstateV2ParseError> {
829 847 Ok(if let Some(node) = self.get_node(key)? {
830 848 node.has_copy_source()
831 849 } else {
832 850 false
833 851 })
834 852 }
835 853
836 854 fn copy_map_get(
837 855 &self,
838 856 key: &HgPath,
839 857 ) -> Result<Option<&HgPath>, DirstateV2ParseError> {
840 858 if let Some(node) = self.get_node(key)? {
841 if let Some(source) = node.copy_source()? {
859 if let Some(source) = node.copy_source(self.on_disk)? {
842 860 return Ok(Some(source));
843 861 }
844 862 }
845 863 Ok(None)
846 864 }
847 865
848 866 fn copy_map_remove(
849 867 &mut self,
850 868 key: &HgPath,
851 869 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
852 870 let count = &mut self.nodes_with_copy_source_count;
853 Ok(Self::get_node_mut(&mut self.root, key)?.and_then(|node| {
871 Ok(
872 Self::get_node_mut(self.on_disk, &mut self.root, key)?.and_then(
873 |node| {
854 874 if node.copy_source.is_some() {
855 875 *count -= 1
856 876 }
857 877 node.copy_source.take().map(Cow::into_owned)
858 }))
878 },
879 ),
880 )
859 881 }
860 882
861 883 fn copy_map_insert(
862 884 &mut self,
863 885 key: HgPathBuf,
864 886 value: HgPathBuf,
865 887 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
866 888 let node = Self::get_or_insert_node(
889 self.on_disk,
867 890 &mut self.root,
868 891 &key,
869 892 WithBasename::to_cow_owned,
870 893 |_ancestor| {},
871 894 )?;
872 895 if node.copy_source.is_none() {
873 896 self.nodes_with_copy_source_count += 1
874 897 }
875 898 Ok(node.copy_source.replace(value.into()).map(Cow::into_owned))
876 899 }
877 900
878 901 fn len(&self) -> usize {
879 902 self.nodes_with_entry_count as usize
880 903 }
881 904
882 905 fn contains_key(
883 906 &self,
884 907 key: &HgPath,
885 908 ) -> Result<bool, DirstateV2ParseError> {
886 909 Ok(self.get(key)?.is_some())
887 910 }
888 911
889 912 fn get(
890 913 &self,
891 914 key: &HgPath,
892 915 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
893 916 Ok(if let Some(node) = self.get_node(key)? {
894 917 node.entry()?
895 918 } else {
896 919 None
897 920 })
898 921 }
899 922
900 923 fn iter(&self) -> StateMapIter<'_> {
901 Box::new(filter_map_results(self.iter_nodes(), |node| {
924 Box::new(filter_map_results(self.iter_nodes(), move |node| {
902 925 Ok(if let Some(entry) = node.entry()? {
903 Some((node.full_path()?, entry))
926 Some((node.full_path(self.on_disk)?, entry))
904 927 } else {
905 928 None
906 929 })
907 930 }))
908 931 }
909 932 }
@@ -1,360 +1,365 b''
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! # File format
4 4 //!
5 5 //! The file starts with a fixed-sized header, whose layout is defined by the
6 6 //! `Header` struct. Its `root` field contains the slice (offset and length) to
7 7 //! the nodes representing the files and directories at the root of the
8 8 //! repository. Each node is also fixed-size, defined by the `Node` struct.
9 9 //! Nodes in turn contain slices to variable-size paths, and to their own child
10 10 //! nodes (if any) for nested files and directories.
11 11
12 12 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
13 13 use crate::dirstate_tree::path_with_basename::WithBasename;
14 14 use crate::errors::HgError;
15 15 use crate::utils::hg_path::HgPath;
16 16 use crate::DirstateEntry;
17 17 use crate::DirstateError;
18 18 use crate::DirstateParents;
19 19 use bytes_cast::unaligned::{I32Be, U32Be, U64Be};
20 20 use bytes_cast::BytesCast;
21 21 use std::borrow::Cow;
22 22 use std::convert::{TryFrom, TryInto};
23 23
24 24 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
25 25 /// This a redundant sanity check more than an actual "magic number" since
26 26 /// `.hg/requires` already governs which format should be used.
27 27 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
28 28
29 29 #[derive(BytesCast)]
30 30 #[repr(C)]
31 31 struct Header {
32 32 marker: [u8; V2_FORMAT_MARKER.len()],
33 33
34 34 /// `dirstatemap.parents()` in `mercurial/dirstate.py` relies on this
35 35 /// `parents` field being at this offset, immediately after `marker`.
36 36 parents: DirstateParents,
37 37
38 38 root: ChildNodes,
39 39 nodes_with_entry_count: Size,
40 40 nodes_with_copy_source_count: Size,
41 41 }
42 42
43 43 #[derive(BytesCast)]
44 44 #[repr(C)]
45 45 struct Node {
46 46 full_path: PathSlice,
47 47
48 48 /// In bytes from `self.full_path.start`
49 49 base_name_start: Size,
50 50
51 51 copy_source: OptPathSlice,
52 52 entry: OptEntry,
53 53 children: ChildNodes,
54 54 tracked_descendants_count: Size,
55 55 }
56 56
57 57 /// Either nothing if `state == b'\0'`, or a dirstate entry like in the v1
58 58 /// format
59 59 #[derive(BytesCast)]
60 60 #[repr(C)]
61 61 struct OptEntry {
62 62 state: u8,
63 63 mode: I32Be,
64 64 mtime: I32Be,
65 65 size: I32Be,
66 66 }
67 67
68 68 /// Counted in bytes from the start of the file
69 69 ///
70 70 /// NOTE: If we decide to never support `.hg/dirstate` files larger than 4 GiB
71 71 /// we could save space by using `U32Be` instead.
72 72 type Offset = U64Be;
73 73
74 74 /// Counted in number of items
75 75 ///
76 76 /// NOTE: not supporting directories with more than 4 billion direct children,
77 77 /// or filenames more than 4 GiB.
78 78 type Size = U32Be;
79 79
80 80 /// Location of consecutive, fixed-size items.
81 81 ///
82 82 /// An item can be a single byte for paths, or a struct with
83 83 /// `derive(BytesCast)`.
84 84 #[derive(BytesCast, Copy, Clone)]
85 85 #[repr(C)]
86 86 struct Slice {
87 87 start: Offset,
88 88 len: Size,
89 89 }
90 90
91 91 /// A contiguous sequence of `len` times `Node`, representing the child nodes
92 92 /// of either some other node or of the repository root.
93 93 ///
94 94 /// Always sorted by ascending `full_path`, to allow binary search.
95 95 /// Since nodes with the same parent nodes also have the same parent path,
96 96 /// only the `base_name`s need to be compared during binary search.
97 97 type ChildNodes = Slice;
98 98
99 99 /// A `HgPath` of `len` bytes
100 100 type PathSlice = Slice;
101 101
102 102 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
103 103 type OptPathSlice = Slice;
104 104
105 105 /// Make sure that size-affecting changes are made knowingly
106 106 fn _static_assert_size_of() {
107 107 let _ = std::mem::transmute::<Header, [u8; 72]>;
108 108 let _ = std::mem::transmute::<Node, [u8; 57]>;
109 109 }
110 110
111 111 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
112 112 ///
113 113 /// This should only happen if Mercurial is buggy or a repository is corrupted.
114 114 #[derive(Debug)]
115 115 pub struct DirstateV2ParseError;
116 116
117 117 impl From<DirstateV2ParseError> for HgError {
118 118 fn from(_: DirstateV2ParseError) -> Self {
119 119 HgError::corrupted("dirstate-v2 parse error")
120 120 }
121 121 }
122 122
123 123 impl From<DirstateV2ParseError> for crate::DirstateError {
124 124 fn from(error: DirstateV2ParseError) -> Self {
125 125 HgError::from(error).into()
126 126 }
127 127 }
128 128
129 129 pub(super) fn read<'on_disk>(
130 130 on_disk: &'on_disk [u8],
131 131 ) -> Result<
132 132 (DirstateMap<'on_disk>, Option<DirstateParents>),
133 133 DirstateV2ParseError,
134 134 > {
135 135 if on_disk.is_empty() {
136 136 return Ok((DirstateMap::empty(on_disk), None));
137 137 }
138 138 let (header, _) =
139 139 Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
140 140 let Header {
141 141 marker,
142 142 parents,
143 143 root,
144 144 nodes_with_entry_count,
145 145 nodes_with_copy_source_count,
146 146 } = header;
147 147 if marker != V2_FORMAT_MARKER {
148 148 return Err(DirstateV2ParseError);
149 149 }
150 150 let dirstate_map = DirstateMap {
151 151 on_disk,
152 152 root: read_nodes(on_disk, *root)?,
153 153 nodes_with_entry_count: nodes_with_entry_count.get(),
154 154 nodes_with_copy_source_count: nodes_with_copy_source_count.get(),
155 155 };
156 156 let parents = Some(parents.clone());
157 157 Ok((dirstate_map, parents))
158 158 }
159 159
160 160 impl Node {
161 161 pub(super) fn path<'on_disk>(
162 162 &self,
163 163 on_disk: &'on_disk [u8],
164 164 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
165 165 let full_path = read_hg_path(on_disk, self.full_path)?;
166 166 let base_name_start = usize::try_from(self.base_name_start.get())
167 167 // u32 -> usize, could only panic on a 16-bit CPU
168 168 .expect("dirstate-v2 base_name_start out of bounds");
169 169 if base_name_start < full_path.len() {
170 170 Ok(WithBasename::from_raw_parts(full_path, base_name_start))
171 171 } else {
172 172 Err(DirstateV2ParseError)
173 173 }
174 174 }
175 175
176 176 pub(super) fn copy_source<'on_disk>(
177 177 &self,
178 178 on_disk: &'on_disk [u8],
179 179 ) -> Result<Option<Cow<'on_disk, HgPath>>, DirstateV2ParseError> {
180 180 Ok(if self.copy_source.start.get() != 0 {
181 181 Some(read_hg_path(on_disk, self.copy_source)?)
182 182 } else {
183 183 None
184 184 })
185 185 }
186 186
187 187 pub(super) fn entry(
188 188 &self,
189 189 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
190 190 Ok(if self.entry.state != b'\0' {
191 191 Some(DirstateEntry {
192 192 state: self
193 193 .entry
194 194 .state
195 195 .try_into()
196 196 .map_err(|_| DirstateV2ParseError)?,
197 197 mode: self.entry.mode.get(),
198 198 mtime: self.entry.mtime.get(),
199 199 size: self.entry.size.get(),
200 200 })
201 201 } else {
202 202 None
203 203 })
204 204 }
205 205
206 206 pub(super) fn to_in_memory_node<'on_disk>(
207 207 &self,
208 208 on_disk: &'on_disk [u8],
209 209 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
210 210 Ok(dirstate_map::Node {
211 211 children: read_nodes(on_disk, self.children)?,
212 212 copy_source: self.copy_source(on_disk)?,
213 213 entry: self.entry()?,
214 214 tracked_descendants_count: self.tracked_descendants_count.get(),
215 215 })
216 216 }
217 217 }
218 218
219 219 fn read_nodes(
220 220 on_disk: &[u8],
221 221 slice: ChildNodes,
222 222 ) -> Result<dirstate_map::ChildNodes, DirstateV2ParseError> {
223 223 read_slice::<Node>(on_disk, slice)?
224 224 .iter()
225 225 .map(|node| {
226 226 Ok((node.path(on_disk)?, node.to_in_memory_node(on_disk)?))
227 227 })
228 228 .collect::<Result<_, _>>()
229 229 .map(dirstate_map::ChildNodes::InMemory)
230 230 }
231 231
232 232 fn read_hg_path(
233 233 on_disk: &[u8],
234 234 slice: Slice,
235 235 ) -> Result<Cow<HgPath>, DirstateV2ParseError> {
236 236 let bytes = read_slice::<u8>(on_disk, slice)?;
237 237 Ok(Cow::Borrowed(HgPath::new(bytes)))
238 238 }
239 239
240 240 fn read_slice<T>(
241 241 on_disk: &[u8],
242 242 slice: Slice,
243 243 ) -> Result<&[T], DirstateV2ParseError>
244 244 where
245 245 T: BytesCast,
246 246 {
247 247 // Either `usize::MAX` would result in "out of bounds" error since a single
248 248 // `&[u8]` cannot occupy the entire addess space.
249 249 let start = usize::try_from(slice.start.get()).unwrap_or(std::usize::MAX);
250 250 let len = usize::try_from(slice.len.get()).unwrap_or(std::usize::MAX);
251 251 on_disk
252 252 .get(start..)
253 253 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
254 254 .map(|(slice, _rest)| slice)
255 255 .ok_or_else(|| DirstateV2ParseError)
256 256 }
257 257
258 258 pub(super) fn write(
259 259 dirstate_map: &mut DirstateMap,
260 260 parents: DirstateParents,
261 261 ) -> Result<Vec<u8>, DirstateError> {
262 262 let header_len = std::mem::size_of::<Header>();
263 263
264 264 // This ignores the space for paths, and for nodes without an entry.
265 265 // TODO: better estimate? Skip the `Vec` and write to a file directly?
266 266 let size_guess = header_len
267 267 + std::mem::size_of::<Node>()
268 268 * dirstate_map.nodes_with_entry_count as usize;
269 269 let mut out = Vec::with_capacity(size_guess);
270 270
271 271 // Keep space for the header. We’ll fill it out at the end when we know the
272 272 // actual offset for the root nodes.
273 273 out.resize(header_len, 0_u8);
274 274
275 let root = write_nodes(dirstate_map.root.as_ref(), &mut out)?;
275 let root =
276 write_nodes(dirstate_map, dirstate_map.root.as_ref(), &mut out)?;
276 277
277 278 let header = Header {
278 279 marker: *V2_FORMAT_MARKER,
279 280 parents: parents,
280 281 root,
281 282 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
282 283 nodes_with_copy_source_count: dirstate_map
283 284 .nodes_with_copy_source_count
284 285 .into(),
285 286 };
286 287 out[..header_len].copy_from_slice(header.as_bytes());
287 288 Ok(out)
288 289 }
289 290
290 291 fn write_nodes(
292 dirstate_map: &DirstateMap,
291 293 nodes: dirstate_map::ChildNodesRef,
292 294 out: &mut Vec<u8>,
293 295 ) -> Result<ChildNodes, DirstateError> {
294 296 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
295 297 // order. Sort to enable binary search in the written file.
296 298 let nodes = nodes.sorted();
297 299
298 300 // First accumulate serialized nodes in a `Vec`
299 301 let mut on_disk_nodes = Vec::with_capacity(nodes.len());
300 302 for node in nodes {
301 let children = write_nodes(node.children()?, out)?;
302 let full_path = write_slice::<u8>(node.full_path()?.as_bytes(), out);
303 let copy_source = if let Some(source) = node.copy_source()? {
303 let children = node.children(dirstate_map.on_disk)?;
304 let children = write_nodes(dirstate_map, children, out)?;
305 let full_path = node.full_path(dirstate_map.on_disk)?;
306 let full_path = write_slice::<u8>(full_path.as_bytes(), out);
307 let copy_source =
308 if let Some(source) = node.copy_source(dirstate_map.on_disk)? {
304 309 write_slice::<u8>(source.as_bytes(), out)
305 310 } else {
306 311 Slice {
307 312 start: 0.into(),
308 313 len: 0.into(),
309 314 }
310 315 };
311 316 on_disk_nodes.push(match node {
312 317 NodeRef::InMemory(path, node) => Node {
313 318 children,
314 319 copy_source,
315 320 full_path,
316 321 base_name_start: u32::try_from(path.base_name_start())
317 322 // Could only panic for paths over 4 GiB
318 323 .expect("dirstate-v2 offset overflow")
319 324 .into(),
320 325 tracked_descendants_count: node
321 326 .tracked_descendants_count
322 327 .into(),
323 328 entry: if let Some(entry) = &node.entry {
324 329 OptEntry {
325 330 state: entry.state.into(),
326 331 mode: entry.mode.into(),
327 332 mtime: entry.mtime.into(),
328 333 size: entry.size.into(),
329 334 }
330 335 } else {
331 336 OptEntry {
332 337 state: b'\0',
333 338 mode: 0.into(),
334 339 mtime: 0.into(),
335 340 size: 0.into(),
336 341 }
337 342 },
338 343 },
339 344 })
340 345 }
341 346 // … so we can write them contiguously
342 347 Ok(write_slice::<Node>(&on_disk_nodes, out))
343 348 }
344 349
345 350 fn write_slice<T>(slice: &[T], out: &mut Vec<u8>) -> Slice
346 351 where
347 352 T: BytesCast,
348 353 {
349 354 let start = u64::try_from(out.len())
350 355 // Could only panic on a 128-bit CPU with a dirstate over 16 EiB
351 356 .expect("dirstate-v2 offset overflow")
352 357 .into();
353 358 let len = u32::try_from(slice.len())
354 359 // Could only panic for paths over 4 GiB or nodes with over 4 billions
355 360 // child nodes
356 361 .expect("dirstate-v2 offset overflow")
357 362 .into();
358 363 out.extend(slice.as_bytes());
359 364 Slice { start, len }
360 365 }
@@ -1,427 +1,433 b''
1 1 use crate::dirstate::status::IgnoreFnType;
2 2 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
3 3 use crate::dirstate_tree::dirstate_map::DirstateMap;
4 4 use crate::dirstate_tree::dirstate_map::NodeRef;
5 5 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
6 6 use crate::matchers::get_ignore_function;
7 7 use crate::matchers::Matcher;
8 8 use crate::utils::files::get_bytes_from_os_string;
9 9 use crate::utils::hg_path::HgPath;
10 10 use crate::BadMatch;
11 11 use crate::DirstateStatus;
12 12 use crate::EntryState;
13 13 use crate::HgPathBuf;
14 14 use crate::PatternFileWarning;
15 15 use crate::StatusError;
16 16 use crate::StatusOptions;
17 17 use micro_timer::timed;
18 18 use rayon::prelude::*;
19 19 use std::borrow::Cow;
20 20 use std::io;
21 21 use std::path::Path;
22 22 use std::path::PathBuf;
23 23 use std::sync::Mutex;
24 24
25 25 /// Returns the status of the working directory compared to its parent
26 26 /// changeset.
27 27 ///
28 28 /// This algorithm is based on traversing the filesystem tree (`fs` in function
29 29 /// and variable names) and dirstate tree at the same time. The core of this
30 30 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
31 31 /// and its use of `itertools::merge_join_by`. When reaching a path that only
32 32 /// exists in one of the two trees, depending on information requested by
33 33 /// `options` we may need to traverse the remaining subtree.
34 34 #[timed]
35 pub fn status<'tree>(
36 dmap: &'tree mut DirstateMap,
35 pub fn status<'tree, 'on_disk: 'tree>(
36 dmap: &'tree mut DirstateMap<'on_disk>,
37 37 matcher: &(dyn Matcher + Sync),
38 38 root_dir: PathBuf,
39 39 ignore_files: Vec<PathBuf>,
40 40 options: StatusOptions,
41 41 ) -> Result<(DirstateStatus<'tree>, Vec<PatternFileWarning>), StatusError> {
42 42 let (ignore_fn, warnings): (IgnoreFnType, _) =
43 43 if options.list_ignored || options.list_unknown {
44 44 get_ignore_function(ignore_files, &root_dir)?
45 45 } else {
46 46 (Box::new(|&_| true), vec![])
47 47 };
48 48
49 49 let common = StatusCommon {
50 dmap,
50 51 options,
51 52 matcher,
52 53 ignore_fn,
53 54 outcome: Mutex::new(DirstateStatus::default()),
54 55 };
55 56 let is_at_repo_root = true;
56 57 let hg_path = HgPath::new("");
57 58 let has_ignored_ancestor = false;
58 59 common.traverse_fs_directory_and_dirstate(
59 60 has_ignored_ancestor,
60 61 dmap.root.as_ref(),
61 62 hg_path,
62 63 &root_dir,
63 64 is_at_repo_root,
64 65 )?;
65 66 Ok((common.outcome.into_inner().unwrap(), warnings))
66 67 }
67 68
68 69 /// Bag of random things needed by various parts of the algorithm. Reduces the
69 70 /// number of parameters passed to functions.
70 struct StatusCommon<'tree, 'a> {
71 struct StatusCommon<'tree, 'a, 'on_disk: 'tree> {
72 dmap: &'tree DirstateMap<'on_disk>,
71 73 options: StatusOptions,
72 74 matcher: &'a (dyn Matcher + Sync),
73 75 ignore_fn: IgnoreFnType<'a>,
74 76 outcome: Mutex<DirstateStatus<'tree>>,
75 77 }
76 78
77 impl<'tree, 'a> StatusCommon<'tree, 'a> {
79 impl<'tree, 'a> StatusCommon<'tree, 'a, '_> {
78 80 fn read_dir(
79 81 &self,
80 82 hg_path: &HgPath,
81 83 fs_path: &Path,
82 84 is_at_repo_root: bool,
83 85 ) -> Result<Vec<DirEntry>, ()> {
84 86 DirEntry::read_dir(fs_path, is_at_repo_root).map_err(|error| {
85 87 let errno = error.raw_os_error().expect("expected real OS error");
86 88 self.outcome
87 89 .lock()
88 90 .unwrap()
89 91 .bad
90 92 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
91 93 })
92 94 }
93 95
94 96 fn traverse_fs_directory_and_dirstate(
95 97 &self,
96 98 has_ignored_ancestor: bool,
97 99 dirstate_nodes: ChildNodesRef<'tree, '_>,
98 100 directory_hg_path: &'tree HgPath,
99 101 directory_fs_path: &Path,
100 102 is_at_repo_root: bool,
101 103 ) -> Result<(), DirstateV2ParseError> {
102 104 let mut fs_entries = if let Ok(entries) = self.read_dir(
103 105 directory_hg_path,
104 106 directory_fs_path,
105 107 is_at_repo_root,
106 108 ) {
107 109 entries
108 110 } else {
109 111 return Ok(());
110 112 };
111 113
112 114 // `merge_join_by` requires both its input iterators to be sorted:
113 115
114 116 let dirstate_nodes = dirstate_nodes.sorted();
115 117 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
116 118 // https://github.com/rust-lang/rust/issues/34162
117 119 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
118 120
119 121 // Propagate here any error that would happen inside the comparison
120 122 // callback below
121 123 for dirstate_node in &dirstate_nodes {
122 dirstate_node.base_name()?;
124 dirstate_node.base_name(self.dmap.on_disk)?;
123 125 }
124 126 itertools::merge_join_by(
125 127 dirstate_nodes,
126 128 &fs_entries,
127 129 |dirstate_node, fs_entry| {
128 130 // This `unwrap` never panics because we already propagated
129 131 // those errors above
130 dirstate_node.base_name().unwrap().cmp(&fs_entry.base_name)
132 dirstate_node
133 .base_name(self.dmap.on_disk)
134 .unwrap()
135 .cmp(&fs_entry.base_name)
131 136 },
132 137 )
133 138 .par_bridge()
134 139 .map(|pair| {
135 140 use itertools::EitherOrBoth::*;
136 141 match pair {
137 142 Both(dirstate_node, fs_entry) => self
138 143 .traverse_fs_and_dirstate(
139 144 fs_entry,
140 145 dirstate_node,
141 146 has_ignored_ancestor,
142 147 ),
143 148 Left(dirstate_node) => {
144 149 self.traverse_dirstate_only(dirstate_node)
145 150 }
146 151 Right(fs_entry) => Ok(self.traverse_fs_only(
147 152 has_ignored_ancestor,
148 153 directory_hg_path,
149 154 fs_entry,
150 155 )),
151 156 }
152 157 })
153 158 .collect()
154 159 }
155 160
156 161 fn traverse_fs_and_dirstate(
157 162 &self,
158 163 fs_entry: &DirEntry,
159 164 dirstate_node: NodeRef<'tree, '_>,
160 165 has_ignored_ancestor: bool,
161 166 ) -> Result<(), DirstateV2ParseError> {
162 let hg_path = dirstate_node.full_path()?;
167 let hg_path = dirstate_node.full_path(self.dmap.on_disk)?;
163 168 let file_type = fs_entry.metadata.file_type();
164 169 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
165 170 if !file_or_symlink {
166 171 // If we previously had a file here, it was removed (with
167 172 // `hg rm` or similar) or deleted before it could be
168 173 // replaced by a directory or something else.
169 174 self.mark_removed_or_deleted_if_file(
170 175 hg_path,
171 176 dirstate_node.state()?,
172 177 );
173 178 }
174 179 if file_type.is_dir() {
175 180 if self.options.collect_traversed_dirs {
176 181 self.outcome.lock().unwrap().traversed.push(hg_path.into())
177 182 }
178 183 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
179 184 let is_at_repo_root = false;
180 185 self.traverse_fs_directory_and_dirstate(
181 186 is_ignored,
182 dirstate_node.children()?,
187 dirstate_node.children(self.dmap.on_disk)?,
183 188 hg_path,
184 189 &fs_entry.full_path,
185 190 is_at_repo_root,
186 191 )?
187 192 } else {
188 193 if file_or_symlink && self.matcher.matches(hg_path) {
189 194 let full_path = Cow::from(hg_path);
190 195 if let Some(state) = dirstate_node.state()? {
191 196 match state {
192 197 EntryState::Added => {
193 198 self.outcome.lock().unwrap().added.push(full_path)
194 199 }
195 200 EntryState::Removed => self
196 201 .outcome
197 202 .lock()
198 203 .unwrap()
199 204 .removed
200 205 .push(full_path),
201 206 EntryState::Merged => self
202 207 .outcome
203 208 .lock()
204 209 .unwrap()
205 210 .modified
206 211 .push(full_path),
207 212 EntryState::Normal => {
208 213 self.handle_normal_file(&dirstate_node, fs_entry)?
209 214 }
210 215 // This variant is not used in DirstateMap
211 216 // nodes
212 217 EntryState::Unknown => unreachable!(),
213 218 }
214 219 } else {
215 220 // `node.entry.is_none()` indicates a "directory"
216 221 // node, but the filesystem has a file
217 222 self.mark_unknown_or_ignored(
218 223 has_ignored_ancestor,
219 224 full_path,
220 225 )
221 226 }
222 227 }
223 228
224 for child_node in dirstate_node.children()?.iter() {
229 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
230 {
225 231 self.traverse_dirstate_only(child_node)?
226 232 }
227 233 }
228 234 Ok(())
229 235 }
230 236
231 237 /// A file with `EntryState::Normal` in the dirstate was found in the
232 238 /// filesystem
233 239 fn handle_normal_file(
234 240 &self,
235 241 dirstate_node: &NodeRef<'tree, '_>,
236 242 fs_entry: &DirEntry,
237 243 ) -> Result<(), DirstateV2ParseError> {
238 244 // Keep the low 31 bits
239 245 fn truncate_u64(value: u64) -> i32 {
240 246 (value & 0x7FFF_FFFF) as i32
241 247 }
242 248 fn truncate_i64(value: i64) -> i32 {
243 249 (value & 0x7FFF_FFFF) as i32
244 250 }
245 251
246 252 let entry = dirstate_node
247 253 .entry()?
248 254 .expect("handle_normal_file called with entry-less node");
249 let full_path = Cow::from(dirstate_node.full_path()?);
255 let full_path = Cow::from(dirstate_node.full_path(self.dmap.on_disk)?);
250 256 let mode_changed = || {
251 257 self.options.check_exec && entry.mode_changed(&fs_entry.metadata)
252 258 };
253 259 let size_changed = entry.size != truncate_u64(fs_entry.metadata.len());
254 260 if entry.size >= 0
255 261 && size_changed
256 262 && fs_entry.metadata.file_type().is_symlink()
257 263 {
258 264 // issue6456: Size returned may be longer due to encryption
259 265 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
260 266 self.outcome.lock().unwrap().unsure.push(full_path)
261 267 } else if dirstate_node.has_copy_source()
262 268 || entry.is_from_other_parent()
263 269 || (entry.size >= 0 && (size_changed || mode_changed()))
264 270 {
265 271 self.outcome.lock().unwrap().modified.push(full_path)
266 272 } else {
267 273 let mtime = mtime_seconds(&fs_entry.metadata);
268 274 if truncate_i64(mtime) != entry.mtime
269 275 || mtime == self.options.last_normal_time
270 276 {
271 277 self.outcome.lock().unwrap().unsure.push(full_path)
272 278 } else if self.options.list_clean {
273 279 self.outcome.lock().unwrap().clean.push(full_path)
274 280 }
275 281 }
276 282 Ok(())
277 283 }
278 284
279 285 /// A node in the dirstate tree has no corresponding filesystem entry
280 286 fn traverse_dirstate_only(
281 287 &self,
282 288 dirstate_node: NodeRef<'tree, '_>,
283 289 ) -> Result<(), DirstateV2ParseError> {
284 290 self.mark_removed_or_deleted_if_file(
285 dirstate_node.full_path()?,
291 dirstate_node.full_path(self.dmap.on_disk)?,
286 292 dirstate_node.state()?,
287 293 );
288 294 dirstate_node
289 .children()?
295 .children(self.dmap.on_disk)?
290 296 .par_iter()
291 297 .map(|child_node| self.traverse_dirstate_only(child_node))
292 298 .collect()
293 299 }
294 300
295 301 /// A node in the dirstate tree has no corresponding *file* on the
296 302 /// filesystem
297 303 ///
298 304 /// Does nothing on a "directory" node
299 305 fn mark_removed_or_deleted_if_file(
300 306 &self,
301 307 hg_path: &'tree HgPath,
302 308 dirstate_node_state: Option<EntryState>,
303 309 ) {
304 310 if let Some(state) = dirstate_node_state {
305 311 if self.matcher.matches(hg_path) {
306 312 if let EntryState::Removed = state {
307 313 self.outcome.lock().unwrap().removed.push(hg_path.into())
308 314 } else {
309 315 self.outcome.lock().unwrap().deleted.push(hg_path.into())
310 316 }
311 317 }
312 318 }
313 319 }
314 320
315 321 /// Something in the filesystem has no corresponding dirstate node
316 322 fn traverse_fs_only(
317 323 &self,
318 324 has_ignored_ancestor: bool,
319 325 directory_hg_path: &HgPath,
320 326 fs_entry: &DirEntry,
321 327 ) {
322 328 let hg_path = directory_hg_path.join(&fs_entry.base_name);
323 329 let file_type = fs_entry.metadata.file_type();
324 330 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
325 331 if file_type.is_dir() {
326 332 let is_ignored =
327 333 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
328 334 let traverse_children = if is_ignored {
329 335 // Descendants of an ignored directory are all ignored
330 336 self.options.list_ignored
331 337 } else {
332 338 // Descendants of an unknown directory may be either unknown or
333 339 // ignored
334 340 self.options.list_unknown || self.options.list_ignored
335 341 };
336 342 if traverse_children {
337 343 let is_at_repo_root = false;
338 344 if let Ok(children_fs_entries) = self.read_dir(
339 345 &hg_path,
340 346 &fs_entry.full_path,
341 347 is_at_repo_root,
342 348 ) {
343 349 children_fs_entries.par_iter().for_each(|child_fs_entry| {
344 350 self.traverse_fs_only(
345 351 is_ignored,
346 352 &hg_path,
347 353 child_fs_entry,
348 354 )
349 355 })
350 356 }
351 357 }
352 358 if self.options.collect_traversed_dirs {
353 359 self.outcome.lock().unwrap().traversed.push(hg_path.into())
354 360 }
355 361 } else if file_or_symlink && self.matcher.matches(&hg_path) {
356 362 self.mark_unknown_or_ignored(has_ignored_ancestor, hg_path.into())
357 363 }
358 364 }
359 365
360 366 fn mark_unknown_or_ignored(
361 367 &self,
362 368 has_ignored_ancestor: bool,
363 369 hg_path: Cow<'tree, HgPath>,
364 370 ) {
365 371 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
366 372 if is_ignored {
367 373 if self.options.list_ignored {
368 374 self.outcome.lock().unwrap().ignored.push(hg_path)
369 375 }
370 376 } else {
371 377 if self.options.list_unknown {
372 378 self.outcome.lock().unwrap().unknown.push(hg_path)
373 379 }
374 380 }
375 381 }
376 382 }
377 383
378 384 #[cfg(unix)] // TODO
379 385 fn mtime_seconds(metadata: &std::fs::Metadata) -> i64 {
380 386 // Going through `Metadata::modified()` would be portable, but would take
381 387 // care to construct a `SystemTime` value with sub-second precision just
382 388 // for us to throw that away here.
383 389 use std::os::unix::fs::MetadataExt;
384 390 metadata.mtime()
385 391 }
386 392
387 393 struct DirEntry {
388 394 base_name: HgPathBuf,
389 395 full_path: PathBuf,
390 396 metadata: std::fs::Metadata,
391 397 }
392 398
393 399 impl DirEntry {
394 400 /// Returns **unsorted** entries in the given directory, with name and
395 401 /// metadata.
396 402 ///
397 403 /// If a `.hg` sub-directory is encountered:
398 404 ///
399 405 /// * At the repository root, ignore that sub-directory
400 406 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
401 407 /// list instead.
402 408 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
403 409 let mut results = Vec::new();
404 410 for entry in path.read_dir()? {
405 411 let entry = entry?;
406 412 let metadata = entry.metadata()?;
407 413 let name = get_bytes_from_os_string(entry.file_name());
408 414 // FIXME don't do this when cached
409 415 if name == b".hg" {
410 416 if is_at_repo_root {
411 417 // Skip the repo’s own .hg (might be a symlink)
412 418 continue;
413 419 } else if metadata.is_dir() {
414 420 // A .hg sub-directory at another location means a subrepo,
415 421 // skip it entirely.
416 422 return Ok(Vec::new());
417 423 }
418 424 }
419 425 results.push(DirEntry {
420 426 base_name: name.into(),
421 427 full_path: entry.path(),
422 428 metadata,
423 429 })
424 430 }
425 431 Ok(results)
426 432 }
427 433 }
General Comments 0
You need to be logged in to leave comments. Login now