##// END OF EJS Templates
dirstate-v2: Parse the dirstate lazily, with copy-on-write nodes...
Simon Sapin -
r48128:0654b3b3 default
parent child Browse files
Show More
@@ -1,932 +1,995 b''
1 1 use bytes_cast::BytesCast;
2 2 use micro_timer::timed;
3 3 use std::borrow::Cow;
4 4 use std::convert::TryInto;
5 5 use std::path::PathBuf;
6 6
7 7 use super::on_disk;
8 8 use super::on_disk::DirstateV2ParseError;
9 9 use super::path_with_basename::WithBasename;
10 10 use crate::dirstate::parsers::pack_entry;
11 11 use crate::dirstate::parsers::packed_entry_size;
12 12 use crate::dirstate::parsers::parse_dirstate_entries;
13 13 use crate::dirstate::parsers::Timestamp;
14 14 use crate::matchers::Matcher;
15 15 use crate::utils::hg_path::{HgPath, HgPathBuf};
16 16 use crate::CopyMapIter;
17 17 use crate::DirstateEntry;
18 18 use crate::DirstateError;
19 19 use crate::DirstateParents;
20 20 use crate::DirstateStatus;
21 21 use crate::EntryState;
22 22 use crate::FastHashMap;
23 23 use crate::PatternFileWarning;
24 24 use crate::StateMapIter;
25 25 use crate::StatusError;
26 26 use crate::StatusOptions;
27 27
28 28 pub struct DirstateMap<'on_disk> {
29 29 /// Contents of the `.hg/dirstate` file
30 30 pub(super) on_disk: &'on_disk [u8],
31 31
32 32 pub(super) root: ChildNodes<'on_disk>,
33 33
34 34 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
35 35 pub(super) nodes_with_entry_count: u32,
36 36
37 37 /// Number of nodes anywhere in the tree that have
38 38 /// `.copy_source.is_some()`.
39 39 pub(super) nodes_with_copy_source_count: u32,
40 40 }
41 41
42 42 /// Using a plain `HgPathBuf` of the full path from the repository root as a
43 43 /// map key would also work: all paths in a given map have the same parent
44 44 /// path, so comparing full paths gives the same result as comparing base
45 45 /// names. However `HashMap` would waste time always re-hashing the same
46 46 /// string prefix.
47 47 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
48 48
49 49 pub(super) enum ChildNodes<'on_disk> {
50 50 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
51 OnDisk(&'on_disk [on_disk::Node]),
51 52 }
52 53
53 54 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
54 55 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
56 OnDisk(&'on_disk [on_disk::Node]),
55 57 }
56 58
57 59 pub(super) enum NodeRef<'tree, 'on_disk> {
58 60 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
61 OnDisk(&'on_disk on_disk::Node),
59 62 }
60 63
61 64 impl Default for ChildNodes<'_> {
62 65 fn default() -> Self {
63 66 ChildNodes::InMemory(Default::default())
64 67 }
65 68 }
66 69
67 70 impl<'on_disk> ChildNodes<'on_disk> {
68 71 pub(super) fn as_ref<'tree>(
69 72 &'tree self,
70 73 ) -> ChildNodesRef<'tree, 'on_disk> {
71 74 match self {
72 75 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
76 ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes),
73 77 }
74 78 }
75 79
76 80 pub(super) fn is_empty(&self) -> bool {
77 81 match self {
78 82 ChildNodes::InMemory(nodes) => nodes.is_empty(),
83 ChildNodes::OnDisk(nodes) => nodes.is_empty(),
79 84 }
80 85 }
81 86
82 87 pub(super) fn make_mut(
83 88 &mut self,
84 _on_disk: &'on_disk [u8],
89 on_disk: &'on_disk [u8],
85 90 ) -> Result<
86 91 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
87 92 DirstateV2ParseError,
88 93 > {
89 94 match self {
90 95 ChildNodes::InMemory(nodes) => Ok(nodes),
96 ChildNodes::OnDisk(nodes) => {
97 let nodes = nodes
98 .iter()
99 .map(|node| {
100 Ok((
101 node.path(on_disk)?,
102 node.to_in_memory_node(on_disk)?,
103 ))
104 })
105 .collect::<Result<_, _>>()?;
106 *self = ChildNodes::InMemory(nodes);
107 match self {
108 ChildNodes::InMemory(nodes) => Ok(nodes),
109 ChildNodes::OnDisk(_) => unreachable!(),
110 }
111 }
91 112 }
92 113 }
93 114 }
94 115
95 116 impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
96 117 pub(super) fn get(
97 118 &self,
98 119 base_name: &HgPath,
99 _on_disk: &'on_disk [u8],
120 on_disk: &'on_disk [u8],
100 121 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
101 122 match self {
102 123 ChildNodesRef::InMemory(nodes) => Ok(nodes
103 124 .get_key_value(base_name)
104 125 .map(|(k, v)| NodeRef::InMemory(k, v))),
126 ChildNodesRef::OnDisk(nodes) => {
127 let mut parse_result = Ok(());
128 let search_result = nodes.binary_search_by(|node| {
129 match node.base_name(on_disk) {
130 Ok(node_base_name) => node_base_name.cmp(base_name),
131 Err(e) => {
132 parse_result = Err(e);
133 // Dummy comparison result, `search_result` won’t
134 // be used since `parse_result` is an error
135 std::cmp::Ordering::Equal
136 }
137 }
138 });
139 parse_result.map(|()| {
140 search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i]))
141 })
142 }
105 143 }
106 144 }
107 145
108 146 /// Iterate in undefined order
109 147 pub(super) fn iter(
110 148 &self,
111 149 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
112 150 match self {
113 ChildNodesRef::InMemory(nodes) => {
114 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v))
151 ChildNodesRef::InMemory(nodes) => itertools::Either::Left(
152 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)),
153 ),
154 ChildNodesRef::OnDisk(nodes) => {
155 itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk))
115 156 }
116 157 }
117 158 }
118 159
119 160 /// Iterate in parallel in undefined order
120 161 pub(super) fn par_iter(
121 162 &self,
122 163 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
123 164 {
124 165 use rayon::prelude::*;
125 166 match self {
126 ChildNodesRef::InMemory(nodes) => {
127 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v))
128 }
167 ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left(
168 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)),
169 ),
170 ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right(
171 nodes.par_iter().map(NodeRef::OnDisk),
172 ),
129 173 }
130 174 }
131 175
132 176 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
133 177 match self {
134 178 ChildNodesRef::InMemory(nodes) => {
135 179 let mut vec: Vec<_> = nodes
136 180 .iter()
137 181 .map(|(k, v)| NodeRef::InMemory(k, v))
138 182 .collect();
139 183 fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
140 184 match node {
141 185 NodeRef::InMemory(path, _node) => path.base_name(),
186 NodeRef::OnDisk(_) => unreachable!(),
142 187 }
143 188 }
144 189 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
145 190 // value: https://github.com/rust-lang/rust/issues/34162
146 191 vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
147 192 vec
148 193 }
194 ChildNodesRef::OnDisk(nodes) => {
195 // Nodes on disk are already sorted
196 nodes.iter().map(NodeRef::OnDisk).collect()
197 }
149 198 }
150 199 }
151 200 }
152 201
153 202 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
154 203 pub(super) fn full_path(
155 204 &self,
156 _on_disk: &'on_disk [u8],
205 on_disk: &'on_disk [u8],
157 206 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
158 207 match self {
159 208 NodeRef::InMemory(path, _node) => Ok(path.full_path()),
209 NodeRef::OnDisk(node) => node.full_path(on_disk),
160 210 }
161 211 }
162 212
163 213 /// Returns a `Cow` that can borrow 'on_disk but is detached from 'tree
164 214 pub(super) fn full_path_cow(
165 215 &self,
166 _on_disk: &'on_disk [u8],
216 on_disk: &'on_disk [u8],
167 217 ) -> Result<Cow<'on_disk, HgPath>, DirstateV2ParseError> {
168 218 match self {
169 219 NodeRef::InMemory(path, _node) => Ok(path.full_path().clone()),
220 NodeRef::OnDisk(node) => {
221 Ok(Cow::Borrowed(node.full_path(on_disk)?))
222 }
170 223 }
171 224 }
172 225
173 226 pub(super) fn base_name(
174 227 &self,
175 _on_disk: &'on_disk [u8],
228 on_disk: &'on_disk [u8],
176 229 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
177 230 match self {
178 231 NodeRef::InMemory(path, _node) => Ok(path.base_name()),
232 NodeRef::OnDisk(node) => node.base_name(on_disk),
179 233 }
180 234 }
181 235
182 236 pub(super) fn children(
183 237 &self,
184 _on_disk: &'on_disk [u8],
238 on_disk: &'on_disk [u8],
185 239 ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
186 240 match self {
187 241 NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
242 NodeRef::OnDisk(node) => {
243 Ok(ChildNodesRef::OnDisk(node.children(on_disk)?))
244 }
188 245 }
189 246 }
190 247
191 248 pub(super) fn has_copy_source(&self) -> bool {
192 249 match self {
193 250 NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
251 NodeRef::OnDisk(node) => node.has_copy_source(),
194 252 }
195 253 }
196 254
197 255 pub(super) fn copy_source(
198 256 &self,
199 _on_disk: &'on_disk [u8],
257 on_disk: &'on_disk [u8],
200 258 ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
201 259 match self {
202 260 NodeRef::InMemory(_path, node) => {
203 261 Ok(node.copy_source.as_ref().map(|s| &**s))
204 262 }
263 NodeRef::OnDisk(node) => node.copy_source(on_disk),
205 264 }
206 265 }
207 266
208 267 pub(super) fn has_entry(&self) -> bool {
209 268 match self {
210 269 NodeRef::InMemory(_path, node) => node.entry.is_some(),
270 NodeRef::OnDisk(node) => node.has_entry(),
211 271 }
212 272 }
213 273
214 274 pub(super) fn entry(
215 275 &self,
216 276 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
217 277 match self {
218 278 NodeRef::InMemory(_path, node) => Ok(node.entry),
279 NodeRef::OnDisk(node) => node.entry(),
219 280 }
220 281 }
221 282
222 283 pub(super) fn state(
223 284 &self,
224 285 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
225 286 match self {
226 287 NodeRef::InMemory(_path, node) => {
227 288 Ok(node.entry.as_ref().map(|entry| entry.state))
228 289 }
290 NodeRef::OnDisk(node) => node.state(),
229 291 }
230 292 }
231 293
232 294 pub(super) fn tracked_descendants_count(&self) -> u32 {
233 295 match self {
234 296 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
297 NodeRef::OnDisk(node) => node.tracked_descendants_count.get(),
235 298 }
236 299 }
237 300 }
238 301
239 302 /// Represents a file or a directory
240 303 #[derive(Default)]
241 304 pub(super) struct Node<'on_disk> {
242 305 /// `None` for directories
243 306 pub(super) entry: Option<DirstateEntry>,
244 307
245 308 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
246 309
247 310 pub(super) children: ChildNodes<'on_disk>,
248 311
249 312 /// How many (non-inclusive) descendants of this node are tracked files
250 313 pub(super) tracked_descendants_count: u32,
251 314 }
252 315
253 316 impl<'on_disk> DirstateMap<'on_disk> {
254 317 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
255 318 Self {
256 319 on_disk,
257 320 root: ChildNodes::default(),
258 321 nodes_with_entry_count: 0,
259 322 nodes_with_copy_source_count: 0,
260 323 }
261 324 }
262 325
263 326 #[timed]
264 327 pub fn new_v2(
265 328 on_disk: &'on_disk [u8],
266 329 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
267 330 Ok(on_disk::read(on_disk)?)
268 331 }
269 332
270 333 #[timed]
271 334 pub fn new_v1(
272 335 on_disk: &'on_disk [u8],
273 336 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
274 337 let mut map = Self::empty(on_disk);
275 338 if map.on_disk.is_empty() {
276 339 return Ok((map, None));
277 340 }
278 341
279 342 let parents = parse_dirstate_entries(
280 343 map.on_disk,
281 344 |path, entry, copy_source| {
282 345 let tracked = entry.state.is_tracked();
283 346 let node = Self::get_or_insert_node(
284 347 map.on_disk,
285 348 &mut map.root,
286 349 path,
287 350 WithBasename::to_cow_borrowed,
288 351 |ancestor| {
289 352 if tracked {
290 353 ancestor.tracked_descendants_count += 1
291 354 }
292 355 },
293 356 )?;
294 357 assert!(
295 358 node.entry.is_none(),
296 359 "duplicate dirstate entry in read"
297 360 );
298 361 assert!(
299 362 node.copy_source.is_none(),
300 363 "duplicate dirstate entry in read"
301 364 );
302 365 node.entry = Some(*entry);
303 366 node.copy_source = copy_source.map(Cow::Borrowed);
304 367 map.nodes_with_entry_count += 1;
305 368 if copy_source.is_some() {
306 369 map.nodes_with_copy_source_count += 1
307 370 }
308 371 Ok(())
309 372 },
310 373 )?;
311 374 let parents = Some(parents.clone());
312 375
313 376 Ok((map, parents))
314 377 }
315 378
316 379 fn get_node<'tree>(
317 380 &'tree self,
318 381 path: &HgPath,
319 382 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
320 383 let mut children = self.root.as_ref();
321 384 let mut components = path.components();
322 385 let mut component =
323 386 components.next().expect("expected at least one components");
324 387 loop {
325 388 if let Some(child) = children.get(component, self.on_disk)? {
326 389 if let Some(next_component) = components.next() {
327 390 component = next_component;
328 391 children = child.children(self.on_disk)?;
329 392 } else {
330 393 return Ok(Some(child));
331 394 }
332 395 } else {
333 396 return Ok(None);
334 397 }
335 398 }
336 399 }
337 400
338 401 /// Returns a mutable reference to the node at `path` if it exists
339 402 ///
340 403 /// This takes `root` instead of `&mut self` so that callers can mutate
341 404 /// other fields while the returned borrow is still valid
342 405 fn get_node_mut<'tree>(
343 406 on_disk: &'on_disk [u8],
344 407 root: &'tree mut ChildNodes<'on_disk>,
345 408 path: &HgPath,
346 409 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
347 410 let mut children = root;
348 411 let mut components = path.components();
349 412 let mut component =
350 413 components.next().expect("expected at least one components");
351 414 loop {
352 415 if let Some(child) = children.make_mut(on_disk)?.get_mut(component)
353 416 {
354 417 if let Some(next_component) = components.next() {
355 418 component = next_component;
356 419 children = &mut child.children;
357 420 } else {
358 421 return Ok(Some(child));
359 422 }
360 423 } else {
361 424 return Ok(None);
362 425 }
363 426 }
364 427 }
365 428
366 429 fn get_or_insert_node<'tree, 'path>(
367 430 on_disk: &'on_disk [u8],
368 431 root: &'tree mut ChildNodes<'on_disk>,
369 432 path: &'path HgPath,
370 433 to_cow: impl Fn(
371 434 WithBasename<&'path HgPath>,
372 435 ) -> WithBasename<Cow<'on_disk, HgPath>>,
373 436 mut each_ancestor: impl FnMut(&mut Node),
374 437 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
375 438 let mut child_nodes = root;
376 439 let mut inclusive_ancestor_paths =
377 440 WithBasename::inclusive_ancestors_of(path);
378 441 let mut ancestor_path = inclusive_ancestor_paths
379 442 .next()
380 443 .expect("expected at least one inclusive ancestor");
381 444 loop {
382 445 // TODO: can we avoid allocating an owned key in cases where the
383 446 // map already contains that key, without introducing double
384 447 // lookup?
385 448 let child_node = child_nodes
386 449 .make_mut(on_disk)?
387 450 .entry(to_cow(ancestor_path))
388 451 .or_default();
389 452 if let Some(next) = inclusive_ancestor_paths.next() {
390 453 each_ancestor(child_node);
391 454 ancestor_path = next;
392 455 child_nodes = &mut child_node.children;
393 456 } else {
394 457 return Ok(child_node);
395 458 }
396 459 }
397 460 }
398 461
399 462 fn add_or_remove_file(
400 463 &mut self,
401 464 path: &HgPath,
402 465 old_state: EntryState,
403 466 new_entry: DirstateEntry,
404 467 ) -> Result<(), DirstateV2ParseError> {
405 468 let tracked_count_increment =
406 469 match (old_state.is_tracked(), new_entry.state.is_tracked()) {
407 470 (false, true) => 1,
408 471 (true, false) => -1,
409 472 _ => 0,
410 473 };
411 474
412 475 let node = Self::get_or_insert_node(
413 476 self.on_disk,
414 477 &mut self.root,
415 478 path,
416 479 WithBasename::to_cow_owned,
417 480 |ancestor| {
418 481 // We can’t use `+= increment` because the counter is unsigned,
419 482 // and we want debug builds to detect accidental underflow
420 483 // through zero
421 484 match tracked_count_increment {
422 485 1 => ancestor.tracked_descendants_count += 1,
423 486 -1 => ancestor.tracked_descendants_count -= 1,
424 487 _ => {}
425 488 }
426 489 },
427 490 )?;
428 491 if node.entry.is_none() {
429 492 self.nodes_with_entry_count += 1
430 493 }
431 494 node.entry = Some(new_entry);
432 495 Ok(())
433 496 }
434 497
435 498 fn iter_nodes<'tree>(
436 499 &'tree self,
437 500 ) -> impl Iterator<
438 501 Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>,
439 502 > + 'tree {
440 503 // Depth first tree traversal.
441 504 //
442 505 // If we could afford internal iteration and recursion,
443 506 // this would look like:
444 507 //
445 508 // ```
446 509 // fn traverse_children(
447 510 // children: &ChildNodes,
448 511 // each: &mut impl FnMut(&Node),
449 512 // ) {
450 513 // for child in children.values() {
451 514 // traverse_children(&child.children, each);
452 515 // each(child);
453 516 // }
454 517 // }
455 518 // ```
456 519 //
457 520 // However we want an external iterator and therefore can’t use the
458 521 // call stack. Use an explicit stack instead:
459 522 let mut stack = Vec::new();
460 523 let mut iter = self.root.as_ref().iter();
461 524 std::iter::from_fn(move || {
462 525 while let Some(child_node) = iter.next() {
463 526 let children = match child_node.children(self.on_disk) {
464 527 Ok(children) => children,
465 528 Err(error) => return Some(Err(error)),
466 529 };
467 530 // Pseudo-recursion
468 531 let new_iter = children.iter();
469 532 let old_iter = std::mem::replace(&mut iter, new_iter);
470 533 stack.push((child_node, old_iter));
471 534 }
472 535 // Found the end of a `children.iter()` iterator.
473 536 if let Some((child_node, next_iter)) = stack.pop() {
474 537 // "Return" from pseudo-recursion by restoring state from the
475 538 // explicit stack
476 539 iter = next_iter;
477 540
478 541 Some(Ok(child_node))
479 542 } else {
480 543 // Reached the bottom of the stack, we’re done
481 544 None
482 545 }
483 546 })
484 547 }
485 548
486 549 fn clear_known_ambiguous_mtimes(
487 550 &mut self,
488 551 paths: &[impl AsRef<HgPath>],
489 552 ) -> Result<(), DirstateV2ParseError> {
490 553 for path in paths {
491 554 if let Some(node) = Self::get_node_mut(
492 555 self.on_disk,
493 556 &mut self.root,
494 557 path.as_ref(),
495 558 )? {
496 559 if let Some(entry) = node.entry.as_mut() {
497 560 entry.clear_mtime();
498 561 }
499 562 }
500 563 }
501 564 Ok(())
502 565 }
503 566
504 567 /// Return a faillilble iterator of full paths of nodes that have an
505 568 /// `entry` for which the given `predicate` returns true.
506 569 ///
507 570 /// Fallibility means that each iterator item is a `Result`, which may
508 571 /// indicate a parse error of the on-disk dirstate-v2 format. Such errors
509 572 /// should only happen if Mercurial is buggy or a repository is corrupted.
510 573 fn filter_full_paths<'tree>(
511 574 &'tree self,
512 575 predicate: impl Fn(&DirstateEntry) -> bool + 'tree,
513 576 ) -> impl Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + 'tree
514 577 {
515 578 filter_map_results(self.iter_nodes(), move |node| {
516 579 if let Some(entry) = node.entry()? {
517 580 if predicate(&entry) {
518 581 return Ok(Some(node.full_path(self.on_disk)?));
519 582 }
520 583 }
521 584 Ok(None)
522 585 })
523 586 }
524 587 }
525 588
526 589 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
527 590 ///
528 591 /// The callback is only called for incoming `Ok` values. Errors are passed
529 592 /// through as-is. In order to let it use the `?` operator the callback is
530 593 /// expected to return a `Result` of `Option`, instead of an `Option` of
531 594 /// `Result`.
532 595 fn filter_map_results<'a, I, F, A, B, E>(
533 596 iter: I,
534 597 f: F,
535 598 ) -> impl Iterator<Item = Result<B, E>> + 'a
536 599 where
537 600 I: Iterator<Item = Result<A, E>> + 'a,
538 601 F: Fn(A) -> Result<Option<B>, E> + 'a,
539 602 {
540 603 iter.filter_map(move |result| match result {
541 604 Ok(node) => f(node).transpose(),
542 605 Err(e) => Some(Err(e)),
543 606 })
544 607 }
545 608
546 609 impl<'on_disk> super::dispatch::DirstateMapMethods for DirstateMap<'on_disk> {
547 610 fn clear(&mut self) {
548 611 self.root = Default::default();
549 612 self.nodes_with_entry_count = 0;
550 613 self.nodes_with_copy_source_count = 0;
551 614 }
552 615
553 616 fn add_file(
554 617 &mut self,
555 618 filename: &HgPath,
556 619 old_state: EntryState,
557 620 entry: DirstateEntry,
558 621 ) -> Result<(), DirstateError> {
559 622 Ok(self.add_or_remove_file(filename, old_state, entry)?)
560 623 }
561 624
562 625 fn remove_file(
563 626 &mut self,
564 627 filename: &HgPath,
565 628 old_state: EntryState,
566 629 size: i32,
567 630 ) -> Result<(), DirstateError> {
568 631 let entry = DirstateEntry {
569 632 state: EntryState::Removed,
570 633 mode: 0,
571 634 size,
572 635 mtime: 0,
573 636 };
574 637 Ok(self.add_or_remove_file(filename, old_state, entry)?)
575 638 }
576 639
577 640 fn drop_file(
578 641 &mut self,
579 642 filename: &HgPath,
580 643 old_state: EntryState,
581 644 ) -> Result<bool, DirstateError> {
582 645 struct Dropped {
583 646 was_tracked: bool,
584 647 had_entry: bool,
585 648 had_copy_source: bool,
586 649 }
587 650 fn recur<'on_disk>(
588 651 on_disk: &'on_disk [u8],
589 652 nodes: &mut ChildNodes<'on_disk>,
590 653 path: &HgPath,
591 654 ) -> Result<Option<Dropped>, DirstateV2ParseError> {
592 655 let (first_path_component, rest_of_path) =
593 656 path.split_first_component();
594 657 let node = if let Some(node) =
595 658 nodes.make_mut(on_disk)?.get_mut(first_path_component)
596 659 {
597 660 node
598 661 } else {
599 662 return Ok(None);
600 663 };
601 664 let dropped;
602 665 if let Some(rest) = rest_of_path {
603 666 if let Some(d) = recur(on_disk, &mut node.children, rest)? {
604 667 dropped = d;
605 668 if dropped.was_tracked {
606 669 node.tracked_descendants_count -= 1;
607 670 }
608 671 } else {
609 672 return Ok(None);
610 673 }
611 674 } else {
612 675 dropped = Dropped {
613 676 was_tracked: node
614 677 .entry
615 678 .as_ref()
616 679 .map_or(false, |entry| entry.state.is_tracked()),
617 680 had_entry: node.entry.take().is_some(),
618 681 had_copy_source: node.copy_source.take().is_some(),
619 682 };
620 683 }
621 684 // After recursion, for both leaf (rest_of_path is None) nodes and
622 685 // parent nodes, remove a node if it just became empty.
623 686 if node.entry.is_none()
624 687 && node.copy_source.is_none()
625 688 && node.children.is_empty()
626 689 {
627 690 nodes.make_mut(on_disk)?.remove(first_path_component);
628 691 }
629 692 Ok(Some(dropped))
630 693 }
631 694
632 695 if let Some(dropped) = recur(self.on_disk, &mut self.root, filename)? {
633 696 if dropped.had_entry {
634 697 self.nodes_with_entry_count -= 1
635 698 }
636 699 if dropped.had_copy_source {
637 700 self.nodes_with_copy_source_count -= 1
638 701 }
639 702 Ok(dropped.had_entry)
640 703 } else {
641 704 debug_assert!(!old_state.is_tracked());
642 705 Ok(false)
643 706 }
644 707 }
645 708
646 709 fn clear_ambiguous_times(
647 710 &mut self,
648 711 filenames: Vec<HgPathBuf>,
649 712 now: i32,
650 713 ) -> Result<(), DirstateV2ParseError> {
651 714 for filename in filenames {
652 715 if let Some(node) =
653 716 Self::get_node_mut(self.on_disk, &mut self.root, &filename)?
654 717 {
655 718 if let Some(entry) = node.entry.as_mut() {
656 719 entry.clear_ambiguous_mtime(now);
657 720 }
658 721 }
659 722 }
660 723 Ok(())
661 724 }
662 725
663 726 fn non_normal_entries_contains(
664 727 &mut self,
665 728 key: &HgPath,
666 729 ) -> Result<bool, DirstateV2ParseError> {
667 730 Ok(if let Some(node) = self.get_node(key)? {
668 731 node.entry()?.map_or(false, |entry| entry.is_non_normal())
669 732 } else {
670 733 false
671 734 })
672 735 }
673 736
674 737 fn non_normal_entries_remove(&mut self, _key: &HgPath) {
675 738 // Do nothing, this `DirstateMap` does not have a separate "non normal
676 739 // entries" set that need to be kept up to date
677 740 }
678 741
679 742 fn non_normal_or_other_parent_paths(
680 743 &mut self,
681 744 ) -> Box<dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + '_>
682 745 {
683 746 Box::new(self.filter_full_paths(|entry| {
684 747 entry.is_non_normal() || entry.is_from_other_parent()
685 748 }))
686 749 }
687 750
688 751 fn set_non_normal_other_parent_entries(&mut self, _force: bool) {
689 752 // Do nothing, this `DirstateMap` does not have a separate "non normal
690 753 // entries" and "from other parent" sets that need to be recomputed
691 754 }
692 755
693 756 fn iter_non_normal_paths(
694 757 &mut self,
695 758 ) -> Box<
696 759 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_,
697 760 > {
698 761 self.iter_non_normal_paths_panic()
699 762 }
700 763
701 764 fn iter_non_normal_paths_panic(
702 765 &self,
703 766 ) -> Box<
704 767 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_,
705 768 > {
706 769 Box::new(self.filter_full_paths(|entry| entry.is_non_normal()))
707 770 }
708 771
709 772 fn iter_other_parent_paths(
710 773 &mut self,
711 774 ) -> Box<
712 775 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_,
713 776 > {
714 777 Box::new(self.filter_full_paths(|entry| entry.is_from_other_parent()))
715 778 }
716 779
717 780 fn has_tracked_dir(
718 781 &mut self,
719 782 directory: &HgPath,
720 783 ) -> Result<bool, DirstateError> {
721 784 if let Some(node) = self.get_node(directory)? {
722 785 // A node without a `DirstateEntry` was created to hold child
723 786 // nodes, and is therefore a directory.
724 787 Ok(!node.has_entry() && node.tracked_descendants_count() > 0)
725 788 } else {
726 789 Ok(false)
727 790 }
728 791 }
729 792
730 793 fn has_dir(&mut self, directory: &HgPath) -> Result<bool, DirstateError> {
731 794 if let Some(node) = self.get_node(directory)? {
732 795 // A node without a `DirstateEntry` was created to hold child
733 796 // nodes, and is therefore a directory.
734 797 Ok(!node.has_entry())
735 798 } else {
736 799 Ok(false)
737 800 }
738 801 }
739 802
740 803 #[timed]
741 804 fn pack_v1(
742 805 &mut self,
743 806 parents: DirstateParents,
744 807 now: Timestamp,
745 808 ) -> Result<Vec<u8>, DirstateError> {
746 809 let now: i32 = now.0.try_into().expect("time overflow");
747 810 let mut ambiguous_mtimes = Vec::new();
748 811 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
749 812 // reallocations
750 813 let mut size = parents.as_bytes().len();
751 814 for node in self.iter_nodes() {
752 815 let node = node?;
753 816 if let Some(entry) = node.entry()? {
754 817 size += packed_entry_size(
755 818 node.full_path(self.on_disk)?,
756 819 node.copy_source(self.on_disk)?,
757 820 );
758 821 if entry.mtime_is_ambiguous(now) {
759 822 ambiguous_mtimes.push(node.full_path_cow(self.on_disk)?)
760 823 }
761 824 }
762 825 }
763 826 self.clear_known_ambiguous_mtimes(&ambiguous_mtimes)?;
764 827
765 828 let mut packed = Vec::with_capacity(size);
766 829 packed.extend(parents.as_bytes());
767 830
768 831 for node in self.iter_nodes() {
769 832 let node = node?;
770 833 if let Some(entry) = node.entry()? {
771 834 pack_entry(
772 835 node.full_path(self.on_disk)?,
773 836 &entry,
774 837 node.copy_source(self.on_disk)?,
775 838 &mut packed,
776 839 );
777 840 }
778 841 }
779 842 Ok(packed)
780 843 }
781 844
782 845 #[timed]
783 846 fn pack_v2(
784 847 &mut self,
785 848 parents: DirstateParents,
786 849 now: Timestamp,
787 850 ) -> Result<Vec<u8>, DirstateError> {
788 851 // TODO: how do we want to handle this in 2038?
789 852 let now: i32 = now.0.try_into().expect("time overflow");
790 853 let mut paths = Vec::new();
791 854 for node in self.iter_nodes() {
792 855 let node = node?;
793 856 if let Some(entry) = node.entry()? {
794 857 if entry.mtime_is_ambiguous(now) {
795 858 paths.push(node.full_path_cow(self.on_disk)?)
796 859 }
797 860 }
798 861 }
799 862 // Borrow of `self` ends here since we collect cloned paths
800 863
801 864 self.clear_known_ambiguous_mtimes(&paths)?;
802 865
803 866 on_disk::write(self, parents)
804 867 }
805 868
806 869 fn set_all_dirs(&mut self) -> Result<(), DirstateError> {
807 870 // Do nothing, this `DirstateMap` does not a separate `all_dirs` that
808 871 // needs to be recomputed
809 872 Ok(())
810 873 }
811 874
812 875 fn set_dirs(&mut self) -> Result<(), DirstateError> {
813 876 // Do nothing, this `DirstateMap` does not a separate `dirs` that needs
814 877 // to be recomputed
815 878 Ok(())
816 879 }
817 880
818 881 fn status<'a>(
819 882 &'a mut self,
820 883 matcher: &'a (dyn Matcher + Sync),
821 884 root_dir: PathBuf,
822 885 ignore_files: Vec<PathBuf>,
823 886 options: StatusOptions,
824 887 ) -> Result<(DirstateStatus<'a>, Vec<PatternFileWarning>), StatusError>
825 888 {
826 889 super::status::status(self, matcher, root_dir, ignore_files, options)
827 890 }
828 891
829 892 fn copy_map_len(&self) -> usize {
830 893 self.nodes_with_copy_source_count as usize
831 894 }
832 895
833 896 fn copy_map_iter(&self) -> CopyMapIter<'_> {
834 897 Box::new(filter_map_results(self.iter_nodes(), move |node| {
835 898 Ok(if let Some(source) = node.copy_source(self.on_disk)? {
836 899 Some((node.full_path(self.on_disk)?, source))
837 900 } else {
838 901 None
839 902 })
840 903 }))
841 904 }
842 905
843 906 fn copy_map_contains_key(
844 907 &self,
845 908 key: &HgPath,
846 909 ) -> Result<bool, DirstateV2ParseError> {
847 910 Ok(if let Some(node) = self.get_node(key)? {
848 911 node.has_copy_source()
849 912 } else {
850 913 false
851 914 })
852 915 }
853 916
854 917 fn copy_map_get(
855 918 &self,
856 919 key: &HgPath,
857 920 ) -> Result<Option<&HgPath>, DirstateV2ParseError> {
858 921 if let Some(node) = self.get_node(key)? {
859 922 if let Some(source) = node.copy_source(self.on_disk)? {
860 923 return Ok(Some(source));
861 924 }
862 925 }
863 926 Ok(None)
864 927 }
865 928
866 929 fn copy_map_remove(
867 930 &mut self,
868 931 key: &HgPath,
869 932 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
870 933 let count = &mut self.nodes_with_copy_source_count;
871 934 Ok(
872 935 Self::get_node_mut(self.on_disk, &mut self.root, key)?.and_then(
873 936 |node| {
874 937 if node.copy_source.is_some() {
875 938 *count -= 1
876 939 }
877 940 node.copy_source.take().map(Cow::into_owned)
878 941 },
879 942 ),
880 943 )
881 944 }
882 945
883 946 fn copy_map_insert(
884 947 &mut self,
885 948 key: HgPathBuf,
886 949 value: HgPathBuf,
887 950 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
888 951 let node = Self::get_or_insert_node(
889 952 self.on_disk,
890 953 &mut self.root,
891 954 &key,
892 955 WithBasename::to_cow_owned,
893 956 |_ancestor| {},
894 957 )?;
895 958 if node.copy_source.is_none() {
896 959 self.nodes_with_copy_source_count += 1
897 960 }
898 961 Ok(node.copy_source.replace(value.into()).map(Cow::into_owned))
899 962 }
900 963
901 964 fn len(&self) -> usize {
902 965 self.nodes_with_entry_count as usize
903 966 }
904 967
905 968 fn contains_key(
906 969 &self,
907 970 key: &HgPath,
908 971 ) -> Result<bool, DirstateV2ParseError> {
909 972 Ok(self.get(key)?.is_some())
910 973 }
911 974
912 975 fn get(
913 976 &self,
914 977 key: &HgPath,
915 978 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
916 979 Ok(if let Some(node) = self.get_node(key)? {
917 980 node.entry()?
918 981 } else {
919 982 None
920 983 })
921 984 }
922 985
923 986 fn iter(&self) -> StateMapIter<'_> {
924 987 Box::new(filter_map_results(self.iter_nodes(), move |node| {
925 988 Ok(if let Some(entry) = node.entry()? {
926 989 Some((node.full_path(self.on_disk)?, entry))
927 990 } else {
928 991 None
929 992 })
930 993 }))
931 994 }
932 995 }
@@ -1,365 +1,413 b''
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! # File format
4 4 //!
5 5 //! The file starts with a fixed-sized header, whose layout is defined by the
6 6 //! `Header` struct. Its `root` field contains the slice (offset and length) to
7 7 //! the nodes representing the files and directories at the root of the
8 8 //! repository. Each node is also fixed-size, defined by the `Node` struct.
9 9 //! Nodes in turn contain slices to variable-size paths, and to their own child
10 10 //! nodes (if any) for nested files and directories.
11 11
12 12 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
13 13 use crate::dirstate_tree::path_with_basename::WithBasename;
14 14 use crate::errors::HgError;
15 15 use crate::utils::hg_path::HgPath;
16 16 use crate::DirstateEntry;
17 17 use crate::DirstateError;
18 18 use crate::DirstateParents;
19 use crate::EntryState;
19 20 use bytes_cast::unaligned::{I32Be, U32Be, U64Be};
20 21 use bytes_cast::BytesCast;
21 22 use std::borrow::Cow;
22 23 use std::convert::{TryFrom, TryInto};
23 24
24 25 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
25 26 /// This a redundant sanity check more than an actual "magic number" since
26 27 /// `.hg/requires` already governs which format should be used.
27 28 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
28 29
29 30 #[derive(BytesCast)]
30 31 #[repr(C)]
31 32 struct Header {
32 33 marker: [u8; V2_FORMAT_MARKER.len()],
33 34
34 35 /// `dirstatemap.parents()` in `mercurial/dirstate.py` relies on this
35 36 /// `parents` field being at this offset, immediately after `marker`.
36 37 parents: DirstateParents,
37 38
38 39 root: ChildNodes,
39 40 nodes_with_entry_count: Size,
40 41 nodes_with_copy_source_count: Size,
41 42 }
42 43
43 44 #[derive(BytesCast)]
44 45 #[repr(C)]
45 struct Node {
46 pub(super) struct Node {
46 47 full_path: PathSlice,
47 48
48 49 /// In bytes from `self.full_path.start`
49 50 base_name_start: Size,
50 51
51 52 copy_source: OptPathSlice,
52 53 entry: OptEntry,
53 54 children: ChildNodes,
54 tracked_descendants_count: Size,
55 pub(super) tracked_descendants_count: Size,
55 56 }
56 57
57 58 /// Either nothing if `state == b'\0'`, or a dirstate entry like in the v1
58 59 /// format
59 #[derive(BytesCast)]
60 #[derive(BytesCast, Copy, Clone)]
60 61 #[repr(C)]
61 62 struct OptEntry {
62 63 state: u8,
63 64 mode: I32Be,
64 65 mtime: I32Be,
65 66 size: I32Be,
66 67 }
67 68
68 69 /// Counted in bytes from the start of the file
69 70 ///
70 71 /// NOTE: If we decide to never support `.hg/dirstate` files larger than 4 GiB
71 72 /// we could save space by using `U32Be` instead.
72 73 type Offset = U64Be;
73 74
74 75 /// Counted in number of items
75 76 ///
76 77 /// NOTE: not supporting directories with more than 4 billion direct children,
77 78 /// or filenames more than 4 GiB.
78 79 type Size = U32Be;
79 80
80 81 /// Location of consecutive, fixed-size items.
81 82 ///
82 83 /// An item can be a single byte for paths, or a struct with
83 84 /// `derive(BytesCast)`.
84 85 #[derive(BytesCast, Copy, Clone)]
85 86 #[repr(C)]
86 87 struct Slice {
87 88 start: Offset,
88 89 len: Size,
89 90 }
90 91
91 92 /// A contiguous sequence of `len` times `Node`, representing the child nodes
92 93 /// of either some other node or of the repository root.
93 94 ///
94 95 /// Always sorted by ascending `full_path`, to allow binary search.
95 96 /// Since nodes with the same parent nodes also have the same parent path,
96 97 /// only the `base_name`s need to be compared during binary search.
97 98 type ChildNodes = Slice;
98 99
99 100 /// A `HgPath` of `len` bytes
100 101 type PathSlice = Slice;
101 102
102 103 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
103 104 type OptPathSlice = Slice;
104 105
105 106 /// Make sure that size-affecting changes are made knowingly
106 107 fn _static_assert_size_of() {
107 108 let _ = std::mem::transmute::<Header, [u8; 72]>;
108 109 let _ = std::mem::transmute::<Node, [u8; 57]>;
109 110 }
110 111
111 112 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
112 113 ///
113 114 /// This should only happen if Mercurial is buggy or a repository is corrupted.
114 115 #[derive(Debug)]
115 116 pub struct DirstateV2ParseError;
116 117
117 118 impl From<DirstateV2ParseError> for HgError {
118 119 fn from(_: DirstateV2ParseError) -> Self {
119 120 HgError::corrupted("dirstate-v2 parse error")
120 121 }
121 122 }
122 123
123 124 impl From<DirstateV2ParseError> for crate::DirstateError {
124 125 fn from(error: DirstateV2ParseError) -> Self {
125 126 HgError::from(error).into()
126 127 }
127 128 }
128 129
129 130 pub(super) fn read<'on_disk>(
130 131 on_disk: &'on_disk [u8],
131 132 ) -> Result<
132 133 (DirstateMap<'on_disk>, Option<DirstateParents>),
133 134 DirstateV2ParseError,
134 135 > {
135 136 if on_disk.is_empty() {
136 137 return Ok((DirstateMap::empty(on_disk), None));
137 138 }
138 139 let (header, _) =
139 140 Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
140 141 let Header {
141 142 marker,
142 143 parents,
143 144 root,
144 145 nodes_with_entry_count,
145 146 nodes_with_copy_source_count,
146 147 } = header;
147 148 if marker != V2_FORMAT_MARKER {
148 149 return Err(DirstateV2ParseError);
149 150 }
150 151 let dirstate_map = DirstateMap {
151 152 on_disk,
152 root: read_nodes(on_disk, *root)?,
153 root: dirstate_map::ChildNodes::OnDisk(read_slice::<Node>(
154 on_disk, *root,
155 )?),
153 156 nodes_with_entry_count: nodes_with_entry_count.get(),
154 157 nodes_with_copy_source_count: nodes_with_copy_source_count.get(),
155 158 };
156 159 let parents = Some(parents.clone());
157 160 Ok((dirstate_map, parents))
158 161 }
159 162
160 163 impl Node {
161 pub(super) fn path<'on_disk>(
164 pub(super) fn full_path<'on_disk>(
162 165 &self,
163 166 on_disk: &'on_disk [u8],
164 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
165 let full_path = read_hg_path(on_disk, self.full_path)?;
166 let base_name_start = usize::try_from(self.base_name_start.get())
167 // u32 -> usize, could only panic on a 16-bit CPU
168 .expect("dirstate-v2 base_name_start out of bounds");
169 if base_name_start < full_path.len() {
170 Ok(WithBasename::from_raw_parts(full_path, base_name_start))
167 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
168 read_hg_path(on_disk, self.full_path)
169 }
170
171 pub(super) fn base_name_start<'on_disk>(
172 &self,
173 ) -> Result<usize, DirstateV2ParseError> {
174 let start = self.base_name_start.get();
175 if start < self.full_path.len.get() {
176 let start = usize::try_from(start)
177 // u32 -> usize, could only panic on a 16-bit CPU
178 .expect("dirstate-v2 base_name_start out of bounds");
179 Ok(start)
171 180 } else {
172 181 Err(DirstateV2ParseError)
173 182 }
174 183 }
175 184
185 pub(super) fn base_name<'on_disk>(
186 &self,
187 on_disk: &'on_disk [u8],
188 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
189 let full_path = self.full_path(on_disk)?;
190 let base_name_start = self.base_name_start()?;
191 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
192 }
193
194 pub(super) fn path<'on_disk>(
195 &self,
196 on_disk: &'on_disk [u8],
197 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
198 Ok(WithBasename::from_raw_parts(
199 Cow::Borrowed(self.full_path(on_disk)?),
200 self.base_name_start()?,
201 ))
202 }
203
204 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
205 self.copy_source.start.get() != 0
206 }
207
176 208 pub(super) fn copy_source<'on_disk>(
177 209 &self,
178 210 on_disk: &'on_disk [u8],
179 ) -> Result<Option<Cow<'on_disk, HgPath>>, DirstateV2ParseError> {
180 Ok(if self.copy_source.start.get() != 0 {
211 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
212 Ok(if self.has_copy_source() {
181 213 Some(read_hg_path(on_disk, self.copy_source)?)
182 214 } else {
183 215 None
184 216 })
185 217 }
186 218
219 pub(super) fn has_entry(&self) -> bool {
220 self.entry.state != b'\0'
221 }
222
223 pub(super) fn state(
224 &self,
225 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
226 Ok(if self.has_entry() {
227 Some(
228 self.entry
229 .state
230 .try_into()
231 .map_err(|_| DirstateV2ParseError)?,
232 )
233 } else {
234 None
235 })
236 }
237
187 238 pub(super) fn entry(
188 239 &self,
189 240 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
190 Ok(if self.entry.state != b'\0' {
191 Some(DirstateEntry {
192 state: self
193 .entry
194 .state
195 .try_into()
196 .map_err(|_| DirstateV2ParseError)?,
197 mode: self.entry.mode.get(),
198 mtime: self.entry.mtime.get(),
199 size: self.entry.size.get(),
200 })
201 } else {
202 None
203 })
241 Ok(self.state()?.map(|state| DirstateEntry {
242 state,
243 mode: self.entry.mode.get(),
244 mtime: self.entry.mtime.get(),
245 size: self.entry.size.get(),
246 }))
247 }
248
249 pub(super) fn children<'on_disk>(
250 &self,
251 on_disk: &'on_disk [u8],
252 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
253 read_slice::<Node>(on_disk, self.children)
204 254 }
205 255
206 256 pub(super) fn to_in_memory_node<'on_disk>(
207 257 &self,
208 258 on_disk: &'on_disk [u8],
209 259 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
210 260 Ok(dirstate_map::Node {
211 children: read_nodes(on_disk, self.children)?,
212 copy_source: self.copy_source(on_disk)?,
261 children: dirstate_map::ChildNodes::OnDisk(
262 self.children(on_disk)?,
263 ),
264 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
213 265 entry: self.entry()?,
214 266 tracked_descendants_count: self.tracked_descendants_count.get(),
215 267 })
216 268 }
217 269 }
218 270
219 fn read_nodes(
220 on_disk: &[u8],
221 slice: ChildNodes,
222 ) -> Result<dirstate_map::ChildNodes, DirstateV2ParseError> {
223 read_slice::<Node>(on_disk, slice)?
224 .iter()
225 .map(|node| {
226 Ok((node.path(on_disk)?, node.to_in_memory_node(on_disk)?))
227 })
228 .collect::<Result<_, _>>()
229 .map(dirstate_map::ChildNodes::InMemory)
230 }
231
232 271 fn read_hg_path(
233 272 on_disk: &[u8],
234 273 slice: Slice,
235 ) -> Result<Cow<HgPath>, DirstateV2ParseError> {
274 ) -> Result<&HgPath, DirstateV2ParseError> {
236 275 let bytes = read_slice::<u8>(on_disk, slice)?;
237 Ok(Cow::Borrowed(HgPath::new(bytes)))
276 Ok(HgPath::new(bytes))
238 277 }
239 278
240 279 fn read_slice<T>(
241 280 on_disk: &[u8],
242 281 slice: Slice,
243 282 ) -> Result<&[T], DirstateV2ParseError>
244 283 where
245 284 T: BytesCast,
246 285 {
247 286 // Either `usize::MAX` would result in "out of bounds" error since a single
248 287 // `&[u8]` cannot occupy the entire addess space.
249 288 let start = usize::try_from(slice.start.get()).unwrap_or(std::usize::MAX);
250 289 let len = usize::try_from(slice.len.get()).unwrap_or(std::usize::MAX);
251 290 on_disk
252 291 .get(start..)
253 292 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
254 293 .map(|(slice, _rest)| slice)
255 294 .ok_or_else(|| DirstateV2ParseError)
256 295 }
257 296
258 297 pub(super) fn write(
259 298 dirstate_map: &mut DirstateMap,
260 299 parents: DirstateParents,
261 300 ) -> Result<Vec<u8>, DirstateError> {
262 301 let header_len = std::mem::size_of::<Header>();
263 302
264 303 // This ignores the space for paths, and for nodes without an entry.
265 304 // TODO: better estimate? Skip the `Vec` and write to a file directly?
266 305 let size_guess = header_len
267 306 + std::mem::size_of::<Node>()
268 307 * dirstate_map.nodes_with_entry_count as usize;
269 308 let mut out = Vec::with_capacity(size_guess);
270 309
271 310 // Keep space for the header. We’ll fill it out at the end when we know the
272 311 // actual offset for the root nodes.
273 312 out.resize(header_len, 0_u8);
274 313
275 314 let root =
276 315 write_nodes(dirstate_map, dirstate_map.root.as_ref(), &mut out)?;
277 316
278 317 let header = Header {
279 318 marker: *V2_FORMAT_MARKER,
280 319 parents: parents,
281 320 root,
282 321 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
283 322 nodes_with_copy_source_count: dirstate_map
284 323 .nodes_with_copy_source_count
285 324 .into(),
286 325 };
287 326 out[..header_len].copy_from_slice(header.as_bytes());
288 327 Ok(out)
289 328 }
290 329
291 330 fn write_nodes(
292 331 dirstate_map: &DirstateMap,
293 332 nodes: dirstate_map::ChildNodesRef,
294 333 out: &mut Vec<u8>,
295 334 ) -> Result<ChildNodes, DirstateError> {
296 335 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
297 336 // order. Sort to enable binary search in the written file.
298 337 let nodes = nodes.sorted();
299 338
300 339 // First accumulate serialized nodes in a `Vec`
301 340 let mut on_disk_nodes = Vec::with_capacity(nodes.len());
302 341 for node in nodes {
303 let children = node.children(dirstate_map.on_disk)?;
304 let children = write_nodes(dirstate_map, children, out)?;
342 let children = write_nodes(
343 dirstate_map,
344 node.children(dirstate_map.on_disk)?,
345 out,
346 )?;
305 347 let full_path = node.full_path(dirstate_map.on_disk)?;
306 348 let full_path = write_slice::<u8>(full_path.as_bytes(), out);
307 349 let copy_source =
308 350 if let Some(source) = node.copy_source(dirstate_map.on_disk)? {
309 351 write_slice::<u8>(source.as_bytes(), out)
310 352 } else {
311 353 Slice {
312 354 start: 0.into(),
313 355 len: 0.into(),
314 356 }
315 357 };
316 358 on_disk_nodes.push(match node {
317 359 NodeRef::InMemory(path, node) => Node {
318 360 children,
319 361 copy_source,
320 362 full_path,
321 363 base_name_start: u32::try_from(path.base_name_start())
322 364 // Could only panic for paths over 4 GiB
323 365 .expect("dirstate-v2 offset overflow")
324 366 .into(),
325 367 tracked_descendants_count: node
326 368 .tracked_descendants_count
327 369 .into(),
328 370 entry: if let Some(entry) = &node.entry {
329 371 OptEntry {
330 372 state: entry.state.into(),
331 373 mode: entry.mode.into(),
332 374 mtime: entry.mtime.into(),
333 375 size: entry.size.into(),
334 376 }
335 377 } else {
336 378 OptEntry {
337 379 state: b'\0',
338 380 mode: 0.into(),
339 381 mtime: 0.into(),
340 382 size: 0.into(),
341 383 }
342 384 },
343 385 },
386 NodeRef::OnDisk(node) => Node {
387 children,
388 copy_source,
389 full_path,
390 ..*node
391 },
344 392 })
345 393 }
346 394 // … so we can write them contiguously
347 395 Ok(write_slice::<Node>(&on_disk_nodes, out))
348 396 }
349 397
350 398 fn write_slice<T>(slice: &[T], out: &mut Vec<u8>) -> Slice
351 399 where
352 400 T: BytesCast,
353 401 {
354 402 let start = u64::try_from(out.len())
355 403 // Could only panic on a 128-bit CPU with a dirstate over 16 EiB
356 404 .expect("dirstate-v2 offset overflow")
357 405 .into();
358 406 let len = u32::try_from(slice.len())
359 407 // Could only panic for paths over 4 GiB or nodes with over 4 billions
360 408 // child nodes
361 409 .expect("dirstate-v2 offset overflow")
362 410 .into();
363 411 out.extend(slice.as_bytes());
364 412 Slice { start, len }
365 413 }
General Comments 0
You need to be logged in to leave comments. Login now