##// END OF EJS Templates
dirstate-v2: Add a zero-size error type for dirstate v2 parse errors...
Simon Sapin -
r48125:18b3060f default
parent child Browse files
Show More
@@ -1,773 +1,773 b''
1 1 use bytes_cast::BytesCast;
2 2 use micro_timer::timed;
3 3 use std::borrow::Cow;
4 4 use std::convert::TryInto;
5 5 use std::path::PathBuf;
6 6
7 7 use super::on_disk;
8 8 use super::path_with_basename::WithBasename;
9 9 use crate::dirstate::parsers::pack_entry;
10 10 use crate::dirstate::parsers::packed_entry_size;
11 11 use crate::dirstate::parsers::parse_dirstate_entries;
12 12 use crate::dirstate::parsers::Timestamp;
13 13 use crate::matchers::Matcher;
14 14 use crate::utils::hg_path::{HgPath, HgPathBuf};
15 15 use crate::CopyMapIter;
16 16 use crate::DirstateEntry;
17 17 use crate::DirstateError;
18 18 use crate::DirstateMapError;
19 19 use crate::DirstateParents;
20 20 use crate::DirstateStatus;
21 21 use crate::EntryState;
22 22 use crate::FastHashMap;
23 23 use crate::PatternFileWarning;
24 24 use crate::StateMapIter;
25 25 use crate::StatusError;
26 26 use crate::StatusOptions;
27 27
28 28 pub struct DirstateMap<'on_disk> {
29 29 /// Contents of the `.hg/dirstate` file
30 30 pub(super) on_disk: &'on_disk [u8],
31 31
32 32 pub(super) root: ChildNodes<'on_disk>,
33 33
34 34 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
35 35 pub(super) nodes_with_entry_count: u32,
36 36
37 37 /// Number of nodes anywhere in the tree that have
38 38 /// `.copy_source.is_some()`.
39 39 pub(super) nodes_with_copy_source_count: u32,
40 40 }
41 41
42 42 /// Using a plain `HgPathBuf` of the full path from the repository root as a
43 43 /// map key would also work: all paths in a given map have the same parent
44 44 /// path, so comparing full paths gives the same result as comparing base
45 45 /// names. However `HashMap` would waste time always re-hashing the same
46 46 /// string prefix.
47 47 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
48 48
49 49 pub(super) enum ChildNodes<'on_disk> {
50 50 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
51 51 }
52 52
53 53 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
54 54 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
55 55 }
56 56
57 57 pub(super) enum NodeRef<'tree, 'on_disk> {
58 58 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
59 59 }
60 60
61 61 impl Default for ChildNodes<'_> {
62 62 fn default() -> Self {
63 63 ChildNodes::InMemory(Default::default())
64 64 }
65 65 }
66 66
67 67 impl<'on_disk> ChildNodes<'on_disk> {
68 68 pub(super) fn as_ref<'tree>(
69 69 &'tree self,
70 70 ) -> ChildNodesRef<'tree, 'on_disk> {
71 71 match self {
72 72 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
73 73 }
74 74 }
75 75
76 76 pub(super) fn is_empty(&self) -> bool {
77 77 match self {
78 78 ChildNodes::InMemory(nodes) => nodes.is_empty(),
79 79 }
80 80 }
81 81
82 82 pub(super) fn make_mut(
83 83 &mut self,
84 84 ) -> &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>> {
85 85 match self {
86 86 ChildNodes::InMemory(nodes) => nodes,
87 87 }
88 88 }
89 89 }
90 90
91 91 impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
92 92 pub(super) fn get(
93 93 &self,
94 94 base_name: &HgPath,
95 95 ) -> Option<NodeRef<'tree, 'on_disk>> {
96 96 match self {
97 97 ChildNodesRef::InMemory(nodes) => nodes
98 98 .get_key_value(base_name)
99 99 .map(|(k, v)| NodeRef::InMemory(k, v)),
100 100 }
101 101 }
102 102
103 103 /// Iterate in undefined order
104 104 pub(super) fn iter(
105 105 &self,
106 106 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
107 107 match self {
108 108 ChildNodesRef::InMemory(nodes) => {
109 109 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v))
110 110 }
111 111 }
112 112 }
113 113
114 114 /// Iterate in parallel in undefined order
115 115 pub(super) fn par_iter(
116 116 &self,
117 117 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
118 118 {
119 119 use rayon::prelude::*;
120 120 match self {
121 121 ChildNodesRef::InMemory(nodes) => {
122 122 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v))
123 123 }
124 124 }
125 125 }
126 126
127 127 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
128 128 match self {
129 129 ChildNodesRef::InMemory(nodes) => {
130 130 let mut vec: Vec<_> = nodes
131 131 .iter()
132 132 .map(|(k, v)| NodeRef::InMemory(k, v))
133 133 .collect();
134 134 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
135 135 // value: https://github.com/rust-lang/rust/issues/34162
136 136 vec.sort_unstable_by(|a, b| a.base_name().cmp(b.base_name()));
137 137 vec
138 138 }
139 139 }
140 140 }
141 141 }
142 142
143 143 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
144 144 pub(super) fn full_path(&self) -> &'tree HgPath {
145 145 match self {
146 146 NodeRef::InMemory(path, _node) => path.full_path(),
147 147 }
148 148 }
149 149
150 150 /// Returns a `Cow` that can borrow 'on_disk but is detached from 'tree
151 151 pub(super) fn full_path_cow(&self) -> Cow<'on_disk, HgPath> {
152 152 match self {
153 153 NodeRef::InMemory(path, _node) => path.full_path().clone(),
154 154 }
155 155 }
156 156
157 157 pub(super) fn base_name(&self) -> &'tree HgPath {
158 158 match self {
159 159 NodeRef::InMemory(path, _node) => path.base_name(),
160 160 }
161 161 }
162 162
163 163 pub(super) fn children(&self) -> ChildNodesRef<'tree, 'on_disk> {
164 164 match self {
165 165 NodeRef::InMemory(_path, node) => node.children.as_ref(),
166 166 }
167 167 }
168 168
169 169 pub(super) fn copy_source(&self) -> Option<&'tree HgPath> {
170 170 match self {
171 171 NodeRef::InMemory(_path, node) => {
172 172 node.copy_source.as_ref().map(|s| &**s)
173 173 }
174 174 }
175 175 }
176 176
177 177 pub(super) fn has_entry(&self) -> bool {
178 178 match self {
179 179 NodeRef::InMemory(_path, node) => node.entry.is_some(),
180 180 }
181 181 }
182 182
183 183 pub(super) fn entry(&self) -> Option<DirstateEntry> {
184 184 match self {
185 185 NodeRef::InMemory(_path, node) => node.entry,
186 186 }
187 187 }
188 188 pub(super) fn state(&self) -> Option<EntryState> {
189 189 match self {
190 190 NodeRef::InMemory(_path, node) => {
191 191 node.entry.as_ref().map(|entry| entry.state)
192 192 }
193 193 }
194 194 }
195 195
196 196 pub(super) fn tracked_descendants_count(&self) -> u32 {
197 197 match self {
198 198 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
199 199 }
200 200 }
201 201 }
202 202
203 203 /// Represents a file or a directory
204 204 #[derive(Default)]
205 205 pub(super) struct Node<'on_disk> {
206 206 /// `None` for directories
207 207 pub(super) entry: Option<DirstateEntry>,
208 208
209 209 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
210 210
211 211 pub(super) children: ChildNodes<'on_disk>,
212 212
213 213 /// How many (non-inclusive) descendants of this node are tracked files
214 214 pub(super) tracked_descendants_count: u32,
215 215 }
216 216
217 217 impl<'on_disk> DirstateMap<'on_disk> {
218 218 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
219 219 Self {
220 220 on_disk,
221 221 root: ChildNodes::default(),
222 222 nodes_with_entry_count: 0,
223 223 nodes_with_copy_source_count: 0,
224 224 }
225 225 }
226 226
227 227 #[timed]
228 228 pub fn new_v2(
229 229 on_disk: &'on_disk [u8],
230 230 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
231 on_disk::read(on_disk)
231 Ok(on_disk::read(on_disk)?)
232 232 }
233 233
234 234 #[timed]
235 235 pub fn new_v1(
236 236 on_disk: &'on_disk [u8],
237 237 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
238 238 let mut map = Self::empty(on_disk);
239 239 if map.on_disk.is_empty() {
240 240 return Ok((map, None));
241 241 }
242 242
243 243 let parents = parse_dirstate_entries(
244 244 map.on_disk,
245 245 |path, entry, copy_source| {
246 246 let tracked = entry.state.is_tracked();
247 247 let node = Self::get_or_insert_node(
248 248 &mut map.root,
249 249 path,
250 250 WithBasename::to_cow_borrowed,
251 251 |ancestor| {
252 252 if tracked {
253 253 ancestor.tracked_descendants_count += 1
254 254 }
255 255 },
256 256 );
257 257 assert!(
258 258 node.entry.is_none(),
259 259 "duplicate dirstate entry in read"
260 260 );
261 261 assert!(
262 262 node.copy_source.is_none(),
263 263 "duplicate dirstate entry in read"
264 264 );
265 265 node.entry = Some(*entry);
266 266 node.copy_source = copy_source.map(Cow::Borrowed);
267 267 map.nodes_with_entry_count += 1;
268 268 if copy_source.is_some() {
269 269 map.nodes_with_copy_source_count += 1
270 270 }
271 271 },
272 272 )?;
273 273 let parents = Some(parents.clone());
274 274
275 275 Ok((map, parents))
276 276 }
277 277
278 278 fn get_node<'tree>(
279 279 &'tree self,
280 280 path: &HgPath,
281 281 ) -> Option<NodeRef<'tree, 'on_disk>> {
282 282 let mut children = self.root.as_ref();
283 283 let mut components = path.components();
284 284 let mut component =
285 285 components.next().expect("expected at least one components");
286 286 loop {
287 287 let child = children.get(component)?;
288 288 if let Some(next_component) = components.next() {
289 289 component = next_component;
290 290 children = child.children();
291 291 } else {
292 292 return Some(child);
293 293 }
294 294 }
295 295 }
296 296
297 297 /// Returns a mutable reference to the node at `path` if it exists
298 298 ///
299 299 /// This takes `root` instead of `&mut self` so that callers can mutate
300 300 /// other fields while the returned borrow is still valid
301 301 fn get_node_mut<'tree>(
302 302 root: &'tree mut ChildNodes<'on_disk>,
303 303 path: &HgPath,
304 304 ) -> Option<&'tree mut Node<'on_disk>> {
305 305 let mut children = root;
306 306 let mut components = path.components();
307 307 let mut component =
308 308 components.next().expect("expected at least one components");
309 309 loop {
310 310 let child = children.make_mut().get_mut(component)?;
311 311 if let Some(next_component) = components.next() {
312 312 component = next_component;
313 313 children = &mut child.children;
314 314 } else {
315 315 return Some(child);
316 316 }
317 317 }
318 318 }
319 319
320 320 fn get_or_insert_node<'tree, 'path>(
321 321 root: &'tree mut ChildNodes<'on_disk>,
322 322 path: &'path HgPath,
323 323 to_cow: impl Fn(
324 324 WithBasename<&'path HgPath>,
325 325 ) -> WithBasename<Cow<'on_disk, HgPath>>,
326 326 mut each_ancestor: impl FnMut(&mut Node),
327 327 ) -> &'tree mut Node<'on_disk> {
328 328 let mut child_nodes = root;
329 329 let mut inclusive_ancestor_paths =
330 330 WithBasename::inclusive_ancestors_of(path);
331 331 let mut ancestor_path = inclusive_ancestor_paths
332 332 .next()
333 333 .expect("expected at least one inclusive ancestor");
334 334 loop {
335 335 // TODO: can we avoid allocating an owned key in cases where the
336 336 // map already contains that key, without introducing double
337 337 // lookup?
338 338 let child_node = child_nodes
339 339 .make_mut()
340 340 .entry(to_cow(ancestor_path))
341 341 .or_default();
342 342 if let Some(next) = inclusive_ancestor_paths.next() {
343 343 each_ancestor(child_node);
344 344 ancestor_path = next;
345 345 child_nodes = &mut child_node.children;
346 346 } else {
347 347 return child_node;
348 348 }
349 349 }
350 350 }
351 351
352 352 fn add_or_remove_file(
353 353 &mut self,
354 354 path: &HgPath,
355 355 old_state: EntryState,
356 356 new_entry: DirstateEntry,
357 357 ) {
358 358 let tracked_count_increment =
359 359 match (old_state.is_tracked(), new_entry.state.is_tracked()) {
360 360 (false, true) => 1,
361 361 (true, false) => -1,
362 362 _ => 0,
363 363 };
364 364
365 365 let node = Self::get_or_insert_node(
366 366 &mut self.root,
367 367 path,
368 368 WithBasename::to_cow_owned,
369 369 |ancestor| {
370 370 // We can’t use `+= increment` because the counter is unsigned,
371 371 // and we want debug builds to detect accidental underflow
372 372 // through zero
373 373 match tracked_count_increment {
374 374 1 => ancestor.tracked_descendants_count += 1,
375 375 -1 => ancestor.tracked_descendants_count -= 1,
376 376 _ => {}
377 377 }
378 378 },
379 379 );
380 380 if node.entry.is_none() {
381 381 self.nodes_with_entry_count += 1
382 382 }
383 383 node.entry = Some(new_entry)
384 384 }
385 385
386 386 fn iter_nodes<'tree>(
387 387 &'tree self,
388 388 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> + 'tree {
389 389 // Depth first tree traversal.
390 390 //
391 391 // If we could afford internal iteration and recursion,
392 392 // this would look like:
393 393 //
394 394 // ```
395 395 // fn traverse_children(
396 396 // children: &ChildNodes,
397 397 // each: &mut impl FnMut(&Node),
398 398 // ) {
399 399 // for child in children.values() {
400 400 // traverse_children(&child.children, each);
401 401 // each(child);
402 402 // }
403 403 // }
404 404 // ```
405 405 //
406 406 // However we want an external iterator and therefore can’t use the
407 407 // call stack. Use an explicit stack instead:
408 408 let mut stack = Vec::new();
409 409 let mut iter = self.root.as_ref().iter();
410 410 std::iter::from_fn(move || {
411 411 while let Some(child_node) = iter.next() {
412 412 // Pseudo-recursion
413 413 let new_iter = child_node.children().iter();
414 414 let old_iter = std::mem::replace(&mut iter, new_iter);
415 415 stack.push((child_node, old_iter));
416 416 }
417 417 // Found the end of a `children.iter()` iterator.
418 418 if let Some((child_node, next_iter)) = stack.pop() {
419 419 // "Return" from pseudo-recursion by restoring state from the
420 420 // explicit stack
421 421 iter = next_iter;
422 422
423 423 Some(child_node)
424 424 } else {
425 425 // Reached the bottom of the stack, we’re done
426 426 None
427 427 }
428 428 })
429 429 }
430 430
431 431 fn clear_known_ambiguous_mtimes(&mut self, paths: &[impl AsRef<HgPath>]) {
432 432 for path in paths {
433 433 if let Some(node) =
434 434 Self::get_node_mut(&mut self.root, path.as_ref())
435 435 {
436 436 if let Some(entry) = node.entry.as_mut() {
437 437 entry.clear_mtime();
438 438 }
439 439 }
440 440 }
441 441 }
442 442 }
443 443
444 444 impl<'on_disk> super::dispatch::DirstateMapMethods for DirstateMap<'on_disk> {
445 445 fn clear(&mut self) {
446 446 self.root = Default::default();
447 447 self.nodes_with_entry_count = 0;
448 448 self.nodes_with_copy_source_count = 0;
449 449 }
450 450
451 451 fn add_file(
452 452 &mut self,
453 453 filename: &HgPath,
454 454 old_state: EntryState,
455 455 entry: DirstateEntry,
456 456 ) -> Result<(), DirstateMapError> {
457 457 self.add_or_remove_file(filename, old_state, entry);
458 458 Ok(())
459 459 }
460 460
461 461 fn remove_file(
462 462 &mut self,
463 463 filename: &HgPath,
464 464 old_state: EntryState,
465 465 size: i32,
466 466 ) -> Result<(), DirstateMapError> {
467 467 let entry = DirstateEntry {
468 468 state: EntryState::Removed,
469 469 mode: 0,
470 470 size,
471 471 mtime: 0,
472 472 };
473 473 self.add_or_remove_file(filename, old_state, entry);
474 474 Ok(())
475 475 }
476 476
477 477 fn drop_file(
478 478 &mut self,
479 479 filename: &HgPath,
480 480 old_state: EntryState,
481 481 ) -> Result<bool, DirstateMapError> {
482 482 struct Dropped {
483 483 was_tracked: bool,
484 484 had_entry: bool,
485 485 had_copy_source: bool,
486 486 }
487 487 fn recur(nodes: &mut ChildNodes, path: &HgPath) -> Option<Dropped> {
488 488 let (first_path_component, rest_of_path) =
489 489 path.split_first_component();
490 490 let node = nodes.make_mut().get_mut(first_path_component)?;
491 491 let dropped;
492 492 if let Some(rest) = rest_of_path {
493 493 dropped = recur(&mut node.children, rest)?;
494 494 if dropped.was_tracked {
495 495 node.tracked_descendants_count -= 1;
496 496 }
497 497 } else {
498 498 dropped = Dropped {
499 499 was_tracked: node
500 500 .entry
501 501 .as_ref()
502 502 .map_or(false, |entry| entry.state.is_tracked()),
503 503 had_entry: node.entry.take().is_some(),
504 504 had_copy_source: node.copy_source.take().is_some(),
505 505 };
506 506 }
507 507 // After recursion, for both leaf (rest_of_path is None) nodes and
508 508 // parent nodes, remove a node if it just became empty.
509 509 if node.entry.is_none()
510 510 && node.copy_source.is_none()
511 511 && node.children.is_empty()
512 512 {
513 513 nodes.make_mut().remove(first_path_component);
514 514 }
515 515 Some(dropped)
516 516 }
517 517
518 518 if let Some(dropped) = recur(&mut self.root, filename) {
519 519 if dropped.had_entry {
520 520 self.nodes_with_entry_count -= 1
521 521 }
522 522 if dropped.had_copy_source {
523 523 self.nodes_with_copy_source_count -= 1
524 524 }
525 525 Ok(dropped.had_entry)
526 526 } else {
527 527 debug_assert!(!old_state.is_tracked());
528 528 Ok(false)
529 529 }
530 530 }
531 531
532 532 fn clear_ambiguous_times(&mut self, filenames: Vec<HgPathBuf>, now: i32) {
533 533 for filename in filenames {
534 534 if let Some(node) = Self::get_node_mut(&mut self.root, &filename) {
535 535 if let Some(entry) = node.entry.as_mut() {
536 536 entry.clear_ambiguous_mtime(now);
537 537 }
538 538 }
539 539 }
540 540 }
541 541
542 542 fn non_normal_entries_contains(&mut self, key: &HgPath) -> bool {
543 543 self.get_node(key)
544 544 .and_then(|node| node.entry())
545 545 .map_or(false, |entry| entry.is_non_normal())
546 546 }
547 547
548 548 fn non_normal_entries_remove(&mut self, _key: &HgPath) {
549 549 // Do nothing, this `DirstateMap` does not have a separate "non normal
550 550 // entries" set that need to be kept up to date
551 551 }
552 552
553 553 fn non_normal_or_other_parent_paths(
554 554 &mut self,
555 555 ) -> Box<dyn Iterator<Item = &HgPath> + '_> {
556 556 Box::new(self.iter_nodes().filter_map(|node| {
557 557 node.entry()
558 558 .filter(|entry| {
559 559 entry.is_non_normal() || entry.is_from_other_parent()
560 560 })
561 561 .map(|_| node.full_path())
562 562 }))
563 563 }
564 564
565 565 fn set_non_normal_other_parent_entries(&mut self, _force: bool) {
566 566 // Do nothing, this `DirstateMap` does not have a separate "non normal
567 567 // entries" and "from other parent" sets that need to be recomputed
568 568 }
569 569
570 570 fn iter_non_normal_paths(
571 571 &mut self,
572 572 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
573 573 self.iter_non_normal_paths_panic()
574 574 }
575 575
576 576 fn iter_non_normal_paths_panic(
577 577 &self,
578 578 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
579 579 Box::new(self.iter_nodes().filter_map(|node| {
580 580 node.entry()
581 581 .filter(|entry| entry.is_non_normal())
582 582 .map(|_| node.full_path())
583 583 }))
584 584 }
585 585
586 586 fn iter_other_parent_paths(
587 587 &mut self,
588 588 ) -> Box<dyn Iterator<Item = &HgPath> + Send + '_> {
589 589 Box::new(self.iter_nodes().filter_map(|node| {
590 590 node.entry()
591 591 .filter(|entry| entry.is_from_other_parent())
592 592 .map(|_| node.full_path())
593 593 }))
594 594 }
595 595
596 596 fn has_tracked_dir(
597 597 &mut self,
598 598 directory: &HgPath,
599 599 ) -> Result<bool, DirstateMapError> {
600 600 if let Some(node) = self.get_node(directory) {
601 601 // A node without a `DirstateEntry` was created to hold child
602 602 // nodes, and is therefore a directory.
603 603 Ok(!node.has_entry() && node.tracked_descendants_count() > 0)
604 604 } else {
605 605 Ok(false)
606 606 }
607 607 }
608 608
609 609 fn has_dir(
610 610 &mut self,
611 611 directory: &HgPath,
612 612 ) -> Result<bool, DirstateMapError> {
613 613 if let Some(node) = self.get_node(directory) {
614 614 // A node without a `DirstateEntry` was created to hold child
615 615 // nodes, and is therefore a directory.
616 616 Ok(!node.has_entry())
617 617 } else {
618 618 Ok(false)
619 619 }
620 620 }
621 621
622 622 #[timed]
623 623 fn pack_v1(
624 624 &mut self,
625 625 parents: DirstateParents,
626 626 now: Timestamp,
627 627 ) -> Result<Vec<u8>, DirstateError> {
628 628 let now: i32 = now.0.try_into().expect("time overflow");
629 629 let mut ambiguous_mtimes = Vec::new();
630 630 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
631 631 // reallocations
632 632 let mut size = parents.as_bytes().len();
633 633 for node in self.iter_nodes() {
634 634 if let Some(entry) = node.entry() {
635 635 size +=
636 636 packed_entry_size(node.full_path(), node.copy_source());
637 637 if entry.mtime_is_ambiguous(now) {
638 638 ambiguous_mtimes.push(node.full_path_cow())
639 639 }
640 640 }
641 641 }
642 642 self.clear_known_ambiguous_mtimes(&ambiguous_mtimes);
643 643
644 644 let mut packed = Vec::with_capacity(size);
645 645 packed.extend(parents.as_bytes());
646 646
647 647 for node in self.iter_nodes() {
648 648 if let Some(entry) = node.entry() {
649 649 pack_entry(
650 650 node.full_path(),
651 651 &entry,
652 652 node.copy_source(),
653 653 &mut packed,
654 654 );
655 655 }
656 656 }
657 657 Ok(packed)
658 658 }
659 659
660 660 #[timed]
661 661 fn pack_v2(
662 662 &mut self,
663 663 parents: DirstateParents,
664 664 now: Timestamp,
665 665 ) -> Result<Vec<u8>, DirstateError> {
666 666 // TODO: how do we want to handle this in 2038?
667 667 let now: i32 = now.0.try_into().expect("time overflow");
668 668 let mut paths = Vec::new();
669 669 for node in self.iter_nodes() {
670 670 if let Some(entry) = node.entry() {
671 671 if entry.mtime_is_ambiguous(now) {
672 672 paths.push(node.full_path_cow())
673 673 }
674 674 }
675 675 }
676 676 // Borrow of `self` ends here since we collect cloned paths
677 677
678 678 self.clear_known_ambiguous_mtimes(&paths);
679 679
680 680 on_disk::write(self, parents)
681 681 }
682 682
683 683 fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> {
684 684 // Do nothing, this `DirstateMap` does not a separate `all_dirs` that
685 685 // needs to be recomputed
686 686 Ok(())
687 687 }
688 688
689 689 fn set_dirs(&mut self) -> Result<(), DirstateMapError> {
690 690 // Do nothing, this `DirstateMap` does not a separate `dirs` that needs
691 691 // to be recomputed
692 692 Ok(())
693 693 }
694 694
695 695 fn status<'a>(
696 696 &'a mut self,
697 697 matcher: &'a (dyn Matcher + Sync),
698 698 root_dir: PathBuf,
699 699 ignore_files: Vec<PathBuf>,
700 700 options: StatusOptions,
701 701 ) -> Result<(DirstateStatus<'a>, Vec<PatternFileWarning>), StatusError>
702 702 {
703 703 super::status::status(self, matcher, root_dir, ignore_files, options)
704 704 }
705 705
706 706 fn copy_map_len(&self) -> usize {
707 707 self.nodes_with_copy_source_count as usize
708 708 }
709 709
710 710 fn copy_map_iter(&self) -> CopyMapIter<'_> {
711 711 Box::new(self.iter_nodes().filter_map(|node| {
712 712 node.copy_source()
713 713 .map(|copy_source| (node.full_path(), copy_source))
714 714 }))
715 715 }
716 716
717 717 fn copy_map_contains_key(&self, key: &HgPath) -> bool {
718 718 if let Some(node) = self.get_node(key) {
719 719 node.copy_source().is_some()
720 720 } else {
721 721 false
722 722 }
723 723 }
724 724
725 725 fn copy_map_get(&self, key: &HgPath) -> Option<&HgPath> {
726 726 self.get_node(key)?.copy_source()
727 727 }
728 728
729 729 fn copy_map_remove(&mut self, key: &HgPath) -> Option<HgPathBuf> {
730 730 let count = &mut self.nodes_with_copy_source_count;
731 731 Self::get_node_mut(&mut self.root, key).and_then(|node| {
732 732 if node.copy_source.is_some() {
733 733 *count -= 1
734 734 }
735 735 node.copy_source.take().map(Cow::into_owned)
736 736 })
737 737 }
738 738
739 739 fn copy_map_insert(
740 740 &mut self,
741 741 key: HgPathBuf,
742 742 value: HgPathBuf,
743 743 ) -> Option<HgPathBuf> {
744 744 let node = Self::get_or_insert_node(
745 745 &mut self.root,
746 746 &key,
747 747 WithBasename::to_cow_owned,
748 748 |_ancestor| {},
749 749 );
750 750 if node.copy_source.is_none() {
751 751 self.nodes_with_copy_source_count += 1
752 752 }
753 753 node.copy_source.replace(value.into()).map(Cow::into_owned)
754 754 }
755 755
756 756 fn len(&self) -> usize {
757 757 self.nodes_with_entry_count as usize
758 758 }
759 759
760 760 fn contains_key(&self, key: &HgPath) -> bool {
761 761 self.get(key).is_some()
762 762 }
763 763
764 764 fn get(&self, key: &HgPath) -> Option<DirstateEntry> {
765 765 self.get_node(key)?.entry()
766 766 }
767 767
768 768 fn iter(&self) -> StateMapIter<'_> {
769 769 Box::new(self.iter_nodes().filter_map(|node| {
770 770 node.entry().map(|entry| (node.full_path(), entry))
771 771 }))
772 772 }
773 773 }
@@ -1,331 +1,357 b''
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! # File format
4 4 //!
5 5 //! The file starts with a fixed-sized header, whose layout is defined by the
6 6 //! `Header` struct. Its `root` field contains the slice (offset and length) to
7 7 //! the nodes representing the files and directories at the root of the
8 8 //! repository. Each node is also fixed-size, defined by the `Node` struct.
9 9 //! Nodes in turn contain slices to variable-size paths, and to their own child
10 10 //! nodes (if any) for nested files and directories.
11 11
12 12 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
13 13 use crate::dirstate_tree::path_with_basename::WithBasename;
14 14 use crate::errors::HgError;
15 15 use crate::utils::hg_path::HgPath;
16 16 use crate::DirstateEntry;
17 17 use crate::DirstateError;
18 18 use crate::DirstateParents;
19 19 use bytes_cast::unaligned::{I32Be, U32Be, U64Be};
20 20 use bytes_cast::BytesCast;
21 21 use std::borrow::Cow;
22 22 use std::convert::{TryFrom, TryInto};
23 23
24 24 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
25 25 /// This a redundant sanity check more than an actual "magic number" since
26 26 /// `.hg/requires` already governs which format should be used.
27 27 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
28 28
29 29 #[derive(BytesCast)]
30 30 #[repr(C)]
31 31 struct Header {
32 32 marker: [u8; V2_FORMAT_MARKER.len()],
33 33
34 34 /// `dirstatemap.parents()` in `mercurial/dirstate.py` relies on this
35 35 /// `parents` field being at this offset, immediately after `marker`.
36 36 parents: DirstateParents,
37 37
38 38 root: ChildNodes,
39 39 nodes_with_entry_count: Size,
40 40 nodes_with_copy_source_count: Size,
41 41 }
42 42
43 43 #[derive(BytesCast)]
44 44 #[repr(C)]
45 45 struct Node {
46 46 full_path: PathSlice,
47 47
48 48 /// In bytes from `self.full_path.start`
49 49 base_name_start: Size,
50 50
51 51 copy_source: OptPathSlice,
52 52 entry: OptEntry,
53 53 children: ChildNodes,
54 54 tracked_descendants_count: Size,
55 55 }
56 56
57 57 /// Either nothing if `state == b'\0'`, or a dirstate entry like in the v1
58 58 /// format
59 59 #[derive(BytesCast)]
60 60 #[repr(C)]
61 61 struct OptEntry {
62 62 state: u8,
63 63 mode: I32Be,
64 64 mtime: I32Be,
65 65 size: I32Be,
66 66 }
67 67
68 68 /// Counted in bytes from the start of the file
69 69 ///
70 70 /// NOTE: If we decide to never support `.hg/dirstate` files larger than 4 GiB
71 71 /// we could save space by using `U32Be` instead.
72 72 type Offset = U64Be;
73 73
74 74 /// Counted in number of items
75 75 ///
76 76 /// NOTE: not supporting directories with more than 4 billion direct children,
77 77 /// or filenames more than 4 GiB.
78 78 type Size = U32Be;
79 79
80 80 /// Location of consecutive, fixed-size items.
81 81 ///
82 82 /// An item can be a single byte for paths, or a struct with
83 83 /// `derive(BytesCast)`.
84 84 #[derive(BytesCast, Copy, Clone)]
85 85 #[repr(C)]
86 86 struct Slice {
87 87 start: Offset,
88 88 len: Size,
89 89 }
90 90
91 91 /// A contiguous sequence of `len` times `Node`, representing the child nodes
92 92 /// of either some other node or of the repository root.
93 93 ///
94 94 /// Always sorted by ascending `full_path`, to allow binary search.
95 95 /// Since nodes with the same parent nodes also have the same parent path,
96 96 /// only the `base_name`s need to be compared during binary search.
97 97 type ChildNodes = Slice;
98 98
99 99 /// A `HgPath` of `len` bytes
100 100 type PathSlice = Slice;
101 101
102 102 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
103 103 type OptPathSlice = Slice;
104 104
105 105 /// Make sure that size-affecting changes are made knowingly
106 106 fn _static_assert_size_of() {
107 107 let _ = std::mem::transmute::<Header, [u8; 72]>;
108 108 let _ = std::mem::transmute::<Node, [u8; 57]>;
109 109 }
110 110
111 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
112 pub(crate) struct DirstateV2ParseError;
113
114 impl From<DirstateV2ParseError> for HgError {
115 fn from(_: DirstateV2ParseError) -> Self {
116 HgError::corrupted("dirstate-v2 parse error")
117 }
118 }
119
120 impl From<DirstateV2ParseError> for crate::DirstateError {
121 fn from(error: DirstateV2ParseError) -> Self {
122 HgError::from(error).into()
123 }
124 }
125
111 126 pub(super) fn read<'on_disk>(
112 127 on_disk: &'on_disk [u8],
113 ) -> Result<(DirstateMap<'on_disk>, Option<DirstateParents>), DirstateError> {
128 ) -> Result<
129 (DirstateMap<'on_disk>, Option<DirstateParents>),
130 DirstateV2ParseError,
131 > {
114 132 if on_disk.is_empty() {
115 133 return Ok((DirstateMap::empty(on_disk), None));
116 134 }
117 let (header, _) = Header::from_bytes(on_disk)
118 .map_err(|_| HgError::corrupted("truncated dirstate-v2"))?;
135 let (header, _) =
136 Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
119 137 let Header {
120 138 marker,
121 139 parents,
122 140 root,
123 141 nodes_with_entry_count,
124 142 nodes_with_copy_source_count,
125 143 } = header;
126 144 if marker != V2_FORMAT_MARKER {
127 return Err(HgError::corrupted("missing dirstated-v2 marker").into());
145 return Err(DirstateV2ParseError);
128 146 }
129 147 let dirstate_map = DirstateMap {
130 148 on_disk,
131 149 root: read_nodes(on_disk, *root)?,
132 150 nodes_with_entry_count: nodes_with_entry_count.get(),
133 151 nodes_with_copy_source_count: nodes_with_copy_source_count.get(),
134 152 };
135 153 let parents = Some(parents.clone());
136 154 Ok((dirstate_map, parents))
137 155 }
138 156
139 157 impl Node {
140 158 pub(super) fn path<'on_disk>(
141 159 &self,
142 160 on_disk: &'on_disk [u8],
143 ) -> Result<dirstate_map::NodeKey<'on_disk>, HgError> {
161 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
144 162 let full_path = read_hg_path(on_disk, self.full_path)?;
145 163 let base_name_start = usize::try_from(self.base_name_start.get())
146 164 // u32 -> usize, could only panic on a 16-bit CPU
147 165 .expect("dirstate-v2 base_name_start out of bounds");
148 166 if base_name_start < full_path.len() {
149 167 Ok(WithBasename::from_raw_parts(full_path, base_name_start))
150 168 } else {
151 Err(HgError::corrupted(
152 "dirstate-v2 base_name_start out of bounds",
153 ))
169 Err(DirstateV2ParseError)
154 170 }
155 171 }
156 172
157 173 pub(super) fn copy_source<'on_disk>(
158 174 &self,
159 175 on_disk: &'on_disk [u8],
160 ) -> Result<Option<Cow<'on_disk, HgPath>>, HgError> {
176 ) -> Result<Option<Cow<'on_disk, HgPath>>, DirstateV2ParseError> {
161 177 Ok(if self.copy_source.start.get() != 0 {
162 178 Some(read_hg_path(on_disk, self.copy_source)?)
163 179 } else {
164 180 None
165 181 })
166 182 }
167 183
168 pub(super) fn entry(&self) -> Result<Option<DirstateEntry>, HgError> {
184 pub(super) fn entry(
185 &self,
186 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
169 187 Ok(if self.entry.state != b'\0' {
170 188 Some(DirstateEntry {
171 state: self.entry.state.try_into()?,
189 state: self
190 .entry
191 .state
192 .try_into()
193 .map_err(|_| DirstateV2ParseError)?,
172 194 mode: self.entry.mode.get(),
173 195 mtime: self.entry.mtime.get(),
174 196 size: self.entry.size.get(),
175 197 })
176 198 } else {
177 199 None
178 200 })
179 201 }
180 202
181 203 pub(super) fn to_in_memory_node<'on_disk>(
182 204 &self,
183 205 on_disk: &'on_disk [u8],
184 ) -> Result<dirstate_map::Node<'on_disk>, HgError> {
206 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
185 207 Ok(dirstate_map::Node {
186 208 children: read_nodes(on_disk, self.children)?,
187 209 copy_source: self.copy_source(on_disk)?,
188 210 entry: self.entry()?,
189 211 tracked_descendants_count: self.tracked_descendants_count.get(),
190 212 })
191 213 }
192 214 }
193 215
194 216 fn read_nodes(
195 217 on_disk: &[u8],
196 218 slice: ChildNodes,
197 ) -> Result<dirstate_map::ChildNodes, HgError> {
219 ) -> Result<dirstate_map::ChildNodes, DirstateV2ParseError> {
198 220 read_slice::<Node>(on_disk, slice)?
199 221 .iter()
200 222 .map(|node| {
201 223 Ok((node.path(on_disk)?, node.to_in_memory_node(on_disk)?))
202 224 })
203 225 .collect::<Result<_, _>>()
204 226 .map(dirstate_map::ChildNodes::InMemory)
205 227 }
206 228
207 fn read_hg_path(on_disk: &[u8], slice: Slice) -> Result<Cow<HgPath>, HgError> {
229 fn read_hg_path(
230 on_disk: &[u8],
231 slice: Slice,
232 ) -> Result<Cow<HgPath>, DirstateV2ParseError> {
208 233 let bytes = read_slice::<u8>(on_disk, slice)?;
209 234 Ok(Cow::Borrowed(HgPath::new(bytes)))
210 235 }
211 236
212 fn read_slice<T>(on_disk: &[u8], slice: Slice) -> Result<&[T], HgError>
237 fn read_slice<T>(
238 on_disk: &[u8],
239 slice: Slice,
240 ) -> Result<&[T], DirstateV2ParseError>
213 241 where
214 242 T: BytesCast,
215 243 {
216 244 // Either `usize::MAX` would result in "out of bounds" error since a single
217 245 // `&[u8]` cannot occupy the entire addess space.
218 246 let start = usize::try_from(slice.start.get()).unwrap_or(std::usize::MAX);
219 247 let len = usize::try_from(slice.len.get()).unwrap_or(std::usize::MAX);
220 248 on_disk
221 249 .get(start..)
222 250 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
223 251 .map(|(slice, _rest)| slice)
224 .ok_or_else(|| {
225 HgError::corrupted("dirstate v2 slice is out of bounds")
226 })
252 .ok_or_else(|| DirstateV2ParseError)
227 253 }
228 254
229 255 pub(super) fn write(
230 256 dirstate_map: &mut DirstateMap,
231 257 parents: DirstateParents,
232 258 ) -> Result<Vec<u8>, DirstateError> {
233 259 let header_len = std::mem::size_of::<Header>();
234 260
235 261 // This ignores the space for paths, and for nodes without an entry.
236 262 // TODO: better estimate? Skip the `Vec` and write to a file directly?
237 263 let size_guess = header_len
238 264 + std::mem::size_of::<Node>()
239 265 * dirstate_map.nodes_with_entry_count as usize;
240 266 let mut out = Vec::with_capacity(size_guess);
241 267
242 268 // Keep space for the header. We’ll fill it out at the end when we know the
243 269 // actual offset for the root nodes.
244 270 out.resize(header_len, 0_u8);
245 271
246 272 let root = write_nodes(dirstate_map.root.as_ref(), &mut out)?;
247 273
248 274 let header = Header {
249 275 marker: *V2_FORMAT_MARKER,
250 276 parents: parents,
251 277 root,
252 278 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
253 279 nodes_with_copy_source_count: dirstate_map
254 280 .nodes_with_copy_source_count
255 281 .into(),
256 282 };
257 283 out[..header_len].copy_from_slice(header.as_bytes());
258 284 Ok(out)
259 285 }
260 286
261 287 fn write_nodes(
262 288 nodes: dirstate_map::ChildNodesRef,
263 289 out: &mut Vec<u8>,
264 290 ) -> Result<ChildNodes, DirstateError> {
265 291 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
266 292 // order. Sort to enable binary search in the written file.
267 293 let nodes = nodes.sorted();
268 294
269 295 // First accumulate serialized nodes in a `Vec`
270 296 let mut on_disk_nodes = Vec::with_capacity(nodes.len());
271 297 for node in nodes {
272 298 let children = write_nodes(node.children(), out)?;
273 299 let full_path = write_slice::<u8>(node.full_path().as_bytes(), out);
274 300 let copy_source = if let Some(source) = node.copy_source() {
275 301 write_slice::<u8>(source.as_bytes(), out)
276 302 } else {
277 303 Slice {
278 304 start: 0.into(),
279 305 len: 0.into(),
280 306 }
281 307 };
282 308 on_disk_nodes.push(match node {
283 309 NodeRef::InMemory(path, node) => Node {
284 310 children,
285 311 copy_source,
286 312 full_path,
287 313 base_name_start: u32::try_from(path.base_name_start())
288 314 // Could only panic for paths over 4 GiB
289 315 .expect("dirstate-v2 offset overflow")
290 316 .into(),
291 317 tracked_descendants_count: node
292 318 .tracked_descendants_count
293 319 .into(),
294 320 entry: if let Some(entry) = &node.entry {
295 321 OptEntry {
296 322 state: entry.state.into(),
297 323 mode: entry.mode.into(),
298 324 mtime: entry.mtime.into(),
299 325 size: entry.size.into(),
300 326 }
301 327 } else {
302 328 OptEntry {
303 329 state: b'\0',
304 330 mode: 0.into(),
305 331 mtime: 0.into(),
306 332 size: 0.into(),
307 333 }
308 334 },
309 335 },
310 336 })
311 337 }
312 338 // … so we can write them contiguously
313 339 Ok(write_slice::<Node>(&on_disk_nodes, out))
314 340 }
315 341
316 342 fn write_slice<T>(slice: &[T], out: &mut Vec<u8>) -> Slice
317 343 where
318 344 T: BytesCast,
319 345 {
320 346 let start = u64::try_from(out.len())
321 347 // Could only panic on a 128-bit CPU with a dirstate over 16 EiB
322 348 .expect("dirstate-v2 offset overflow")
323 349 .into();
324 350 let len = u32::try_from(slice.len())
325 351 // Could only panic for paths over 4 GiB or nodes with over 4 billions
326 352 // child nodes
327 353 .expect("dirstate-v2 offset overflow")
328 354 .into();
329 355 out.extend(slice.as_bytes());
330 356 Slice { start, len }
331 357 }
General Comments 0
You need to be logged in to leave comments. Login now