##// END OF EJS Templates
rust-dirstate: panic if the DirstateMap counters go below 0...
Raphaël Gomès -
r49865:2593873c stable
parent child Browse files
Show More
@@ -1,1180 +1,1196 b''
1 1 use bytes_cast::BytesCast;
2 2 use micro_timer::timed;
3 3 use std::borrow::Cow;
4 4 use std::path::PathBuf;
5 5
6 6 use super::on_disk;
7 7 use super::on_disk::DirstateV2ParseError;
8 8 use super::owning::OwningDirstateMap;
9 9 use super::path_with_basename::WithBasename;
10 10 use crate::dirstate::parsers::pack_entry;
11 11 use crate::dirstate::parsers::packed_entry_size;
12 12 use crate::dirstate::parsers::parse_dirstate_entries;
13 13 use crate::dirstate::CopyMapIter;
14 14 use crate::dirstate::StateMapIter;
15 15 use crate::dirstate::TruncatedTimestamp;
16 16 use crate::dirstate::SIZE_FROM_OTHER_PARENT;
17 17 use crate::dirstate::SIZE_NON_NORMAL;
18 18 use crate::matchers::Matcher;
19 19 use crate::utils::hg_path::{HgPath, HgPathBuf};
20 20 use crate::DirstateEntry;
21 21 use crate::DirstateError;
22 22 use crate::DirstateParents;
23 23 use crate::DirstateStatus;
24 24 use crate::EntryState;
25 25 use crate::FastHashMap;
26 26 use crate::PatternFileWarning;
27 27 use crate::StatusError;
28 28 use crate::StatusOptions;
29 29
30 30 /// Append to an existing data file if the amount of unreachable data (not used
31 31 /// anymore) is less than this fraction of the total amount of existing data.
32 32 const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5;
33 33
34 34 pub struct DirstateMap<'on_disk> {
35 35 /// Contents of the `.hg/dirstate` file
36 36 pub(super) on_disk: &'on_disk [u8],
37 37
38 38 pub(super) root: ChildNodes<'on_disk>,
39 39
40 40 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
41 41 pub(super) nodes_with_entry_count: u32,
42 42
43 43 /// Number of nodes anywhere in the tree that have
44 44 /// `.copy_source.is_some()`.
45 45 pub(super) nodes_with_copy_source_count: u32,
46 46
47 47 /// See on_disk::Header
48 48 pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
49 49
50 50 /// How many bytes of `on_disk` are not used anymore
51 51 pub(super) unreachable_bytes: u32,
52 52 }
53 53
54 54 /// Using a plain `HgPathBuf` of the full path from the repository root as a
55 55 /// map key would also work: all paths in a given map have the same parent
56 56 /// path, so comparing full paths gives the same result as comparing base
57 57 /// names. However `HashMap` would waste time always re-hashing the same
58 58 /// string prefix.
59 59 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
60 60
61 61 /// Similar to `&'tree Cow<'on_disk, HgPath>`, but can also be returned
62 62 /// for on-disk nodes that don’t actually have a `Cow` to borrow.
63 63 pub(super) enum BorrowedPath<'tree, 'on_disk> {
64 64 InMemory(&'tree HgPathBuf),
65 65 OnDisk(&'on_disk HgPath),
66 66 }
67 67
68 68 pub(super) enum ChildNodes<'on_disk> {
69 69 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
70 70 OnDisk(&'on_disk [on_disk::Node]),
71 71 }
72 72
73 73 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
74 74 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
75 75 OnDisk(&'on_disk [on_disk::Node]),
76 76 }
77 77
78 78 pub(super) enum NodeRef<'tree, 'on_disk> {
79 79 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
80 80 OnDisk(&'on_disk on_disk::Node),
81 81 }
82 82
83 83 impl<'tree, 'on_disk> BorrowedPath<'tree, 'on_disk> {
84 84 pub fn detach_from_tree(&self) -> Cow<'on_disk, HgPath> {
85 85 match *self {
86 86 BorrowedPath::InMemory(in_memory) => Cow::Owned(in_memory.clone()),
87 87 BorrowedPath::OnDisk(on_disk) => Cow::Borrowed(on_disk),
88 88 }
89 89 }
90 90 }
91 91
92 92 impl<'tree, 'on_disk> std::ops::Deref for BorrowedPath<'tree, 'on_disk> {
93 93 type Target = HgPath;
94 94
95 95 fn deref(&self) -> &HgPath {
96 96 match *self {
97 97 BorrowedPath::InMemory(in_memory) => in_memory,
98 98 BorrowedPath::OnDisk(on_disk) => on_disk,
99 99 }
100 100 }
101 101 }
102 102
103 103 impl Default for ChildNodes<'_> {
104 104 fn default() -> Self {
105 105 ChildNodes::InMemory(Default::default())
106 106 }
107 107 }
108 108
109 109 impl<'on_disk> ChildNodes<'on_disk> {
110 110 pub(super) fn as_ref<'tree>(
111 111 &'tree self,
112 112 ) -> ChildNodesRef<'tree, 'on_disk> {
113 113 match self {
114 114 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
115 115 ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes),
116 116 }
117 117 }
118 118
119 119 pub(super) fn is_empty(&self) -> bool {
120 120 match self {
121 121 ChildNodes::InMemory(nodes) => nodes.is_empty(),
122 122 ChildNodes::OnDisk(nodes) => nodes.is_empty(),
123 123 }
124 124 }
125 125
126 126 fn make_mut(
127 127 &mut self,
128 128 on_disk: &'on_disk [u8],
129 129 unreachable_bytes: &mut u32,
130 130 ) -> Result<
131 131 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
132 132 DirstateV2ParseError,
133 133 > {
134 134 match self {
135 135 ChildNodes::InMemory(nodes) => Ok(nodes),
136 136 ChildNodes::OnDisk(nodes) => {
137 137 *unreachable_bytes +=
138 138 std::mem::size_of_val::<[on_disk::Node]>(nodes) as u32;
139 139 let nodes = nodes
140 140 .iter()
141 141 .map(|node| {
142 142 Ok((
143 143 node.path(on_disk)?,
144 144 node.to_in_memory_node(on_disk)?,
145 145 ))
146 146 })
147 147 .collect::<Result<_, _>>()?;
148 148 *self = ChildNodes::InMemory(nodes);
149 149 match self {
150 150 ChildNodes::InMemory(nodes) => Ok(nodes),
151 151 ChildNodes::OnDisk(_) => unreachable!(),
152 152 }
153 153 }
154 154 }
155 155 }
156 156 }
157 157
158 158 impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
159 159 pub(super) fn get(
160 160 &self,
161 161 base_name: &HgPath,
162 162 on_disk: &'on_disk [u8],
163 163 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
164 164 match self {
165 165 ChildNodesRef::InMemory(nodes) => Ok(nodes
166 166 .get_key_value(base_name)
167 167 .map(|(k, v)| NodeRef::InMemory(k, v))),
168 168 ChildNodesRef::OnDisk(nodes) => {
169 169 let mut parse_result = Ok(());
170 170 let search_result = nodes.binary_search_by(|node| {
171 171 match node.base_name(on_disk) {
172 172 Ok(node_base_name) => node_base_name.cmp(base_name),
173 173 Err(e) => {
174 174 parse_result = Err(e);
175 175 // Dummy comparison result, `search_result` won’t
176 176 // be used since `parse_result` is an error
177 177 std::cmp::Ordering::Equal
178 178 }
179 179 }
180 180 });
181 181 parse_result.map(|()| {
182 182 search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i]))
183 183 })
184 184 }
185 185 }
186 186 }
187 187
188 188 /// Iterate in undefined order
189 189 pub(super) fn iter(
190 190 &self,
191 191 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
192 192 match self {
193 193 ChildNodesRef::InMemory(nodes) => itertools::Either::Left(
194 194 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)),
195 195 ),
196 196 ChildNodesRef::OnDisk(nodes) => {
197 197 itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk))
198 198 }
199 199 }
200 200 }
201 201
202 202 /// Iterate in parallel in undefined order
203 203 pub(super) fn par_iter(
204 204 &self,
205 205 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
206 206 {
207 207 use rayon::prelude::*;
208 208 match self {
209 209 ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left(
210 210 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)),
211 211 ),
212 212 ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right(
213 213 nodes.par_iter().map(NodeRef::OnDisk),
214 214 ),
215 215 }
216 216 }
217 217
218 218 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
219 219 match self {
220 220 ChildNodesRef::InMemory(nodes) => {
221 221 let mut vec: Vec<_> = nodes
222 222 .iter()
223 223 .map(|(k, v)| NodeRef::InMemory(k, v))
224 224 .collect();
225 225 fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
226 226 match node {
227 227 NodeRef::InMemory(path, _node) => path.base_name(),
228 228 NodeRef::OnDisk(_) => unreachable!(),
229 229 }
230 230 }
231 231 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
232 232 // value: https://github.com/rust-lang/rust/issues/34162
233 233 vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
234 234 vec
235 235 }
236 236 ChildNodesRef::OnDisk(nodes) => {
237 237 // Nodes on disk are already sorted
238 238 nodes.iter().map(NodeRef::OnDisk).collect()
239 239 }
240 240 }
241 241 }
242 242 }
243 243
244 244 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
245 245 pub(super) fn full_path(
246 246 &self,
247 247 on_disk: &'on_disk [u8],
248 248 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
249 249 match self {
250 250 NodeRef::InMemory(path, _node) => Ok(path.full_path()),
251 251 NodeRef::OnDisk(node) => node.full_path(on_disk),
252 252 }
253 253 }
254 254
255 255 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
256 256 /// HgPath>` detached from `'tree`
257 257 pub(super) fn full_path_borrowed(
258 258 &self,
259 259 on_disk: &'on_disk [u8],
260 260 ) -> Result<BorrowedPath<'tree, 'on_disk>, DirstateV2ParseError> {
261 261 match self {
262 262 NodeRef::InMemory(path, _node) => match path.full_path() {
263 263 Cow::Borrowed(on_disk) => Ok(BorrowedPath::OnDisk(on_disk)),
264 264 Cow::Owned(in_memory) => Ok(BorrowedPath::InMemory(in_memory)),
265 265 },
266 266 NodeRef::OnDisk(node) => {
267 267 Ok(BorrowedPath::OnDisk(node.full_path(on_disk)?))
268 268 }
269 269 }
270 270 }
271 271
272 272 pub(super) fn base_name(
273 273 &self,
274 274 on_disk: &'on_disk [u8],
275 275 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
276 276 match self {
277 277 NodeRef::InMemory(path, _node) => Ok(path.base_name()),
278 278 NodeRef::OnDisk(node) => node.base_name(on_disk),
279 279 }
280 280 }
281 281
282 282 pub(super) fn children(
283 283 &self,
284 284 on_disk: &'on_disk [u8],
285 285 ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
286 286 match self {
287 287 NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
288 288 NodeRef::OnDisk(node) => {
289 289 Ok(ChildNodesRef::OnDisk(node.children(on_disk)?))
290 290 }
291 291 }
292 292 }
293 293
294 294 pub(super) fn has_copy_source(&self) -> bool {
295 295 match self {
296 296 NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
297 297 NodeRef::OnDisk(node) => node.has_copy_source(),
298 298 }
299 299 }
300 300
301 301 pub(super) fn copy_source(
302 302 &self,
303 303 on_disk: &'on_disk [u8],
304 304 ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
305 305 match self {
306 306 NodeRef::InMemory(_path, node) => {
307 307 Ok(node.copy_source.as_ref().map(|s| &**s))
308 308 }
309 309 NodeRef::OnDisk(node) => node.copy_source(on_disk),
310 310 }
311 311 }
312 312 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
313 313 /// HgPath>` detached from `'tree`
314 314 pub(super) fn copy_source_borrowed(
315 315 &self,
316 316 on_disk: &'on_disk [u8],
317 317 ) -> Result<Option<BorrowedPath<'tree, 'on_disk>>, DirstateV2ParseError>
318 318 {
319 319 Ok(match self {
320 320 NodeRef::InMemory(_path, node) => {
321 321 node.copy_source.as_ref().map(|source| match source {
322 322 Cow::Borrowed(on_disk) => BorrowedPath::OnDisk(on_disk),
323 323 Cow::Owned(in_memory) => BorrowedPath::InMemory(in_memory),
324 324 })
325 325 }
326 326 NodeRef::OnDisk(node) => node
327 327 .copy_source(on_disk)?
328 328 .map(|source| BorrowedPath::OnDisk(source)),
329 329 })
330 330 }
331 331
332 332 pub(super) fn entry(
333 333 &self,
334 334 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
335 335 match self {
336 336 NodeRef::InMemory(_path, node) => {
337 337 Ok(node.data.as_entry().copied())
338 338 }
339 339 NodeRef::OnDisk(node) => node.entry(),
340 340 }
341 341 }
342 342
343 343 pub(super) fn state(
344 344 &self,
345 345 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
346 346 Ok(self.entry()?.map(|e| e.state()))
347 347 }
348 348
349 349 pub(super) fn cached_directory_mtime(
350 350 &self,
351 351 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
352 352 match self {
353 353 NodeRef::InMemory(_path, node) => Ok(match node.data {
354 354 NodeData::CachedDirectory { mtime } => Some(mtime),
355 355 _ => None,
356 356 }),
357 357 NodeRef::OnDisk(node) => node.cached_directory_mtime(),
358 358 }
359 359 }
360 360
361 361 pub(super) fn descendants_with_entry_count(&self) -> u32 {
362 362 match self {
363 363 NodeRef::InMemory(_path, node) => {
364 364 node.descendants_with_entry_count
365 365 }
366 366 NodeRef::OnDisk(node) => node.descendants_with_entry_count.get(),
367 367 }
368 368 }
369 369
370 370 pub(super) fn tracked_descendants_count(&self) -> u32 {
371 371 match self {
372 372 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
373 373 NodeRef::OnDisk(node) => node.tracked_descendants_count.get(),
374 374 }
375 375 }
376 376 }
377 377
378 378 /// Represents a file or a directory
379 379 #[derive(Default)]
380 380 pub(super) struct Node<'on_disk> {
381 381 pub(super) data: NodeData,
382 382
383 383 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
384 384
385 385 pub(super) children: ChildNodes<'on_disk>,
386 386
387 387 /// How many (non-inclusive) descendants of this node have an entry.
388 388 pub(super) descendants_with_entry_count: u32,
389 389
390 390 /// How many (non-inclusive) descendants of this node have an entry whose
391 391 /// state is "tracked".
392 392 pub(super) tracked_descendants_count: u32,
393 393 }
394 394
395 395 pub(super) enum NodeData {
396 396 Entry(DirstateEntry),
397 397 CachedDirectory { mtime: TruncatedTimestamp },
398 398 None,
399 399 }
400 400
401 401 impl Default for NodeData {
402 402 fn default() -> Self {
403 403 NodeData::None
404 404 }
405 405 }
406 406
407 407 impl NodeData {
408 408 fn has_entry(&self) -> bool {
409 409 match self {
410 410 NodeData::Entry(_) => true,
411 411 _ => false,
412 412 }
413 413 }
414 414
415 415 fn as_entry(&self) -> Option<&DirstateEntry> {
416 416 match self {
417 417 NodeData::Entry(entry) => Some(entry),
418 418 _ => None,
419 419 }
420 420 }
421 421 }
422 422
423 423 impl<'on_disk> DirstateMap<'on_disk> {
424 424 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
425 425 Self {
426 426 on_disk,
427 427 root: ChildNodes::default(),
428 428 nodes_with_entry_count: 0,
429 429 nodes_with_copy_source_count: 0,
430 430 ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
431 431 unreachable_bytes: 0,
432 432 }
433 433 }
434 434
435 435 #[timed]
436 436 pub fn new_v2(
437 437 on_disk: &'on_disk [u8],
438 438 data_size: usize,
439 439 metadata: &[u8],
440 440 ) -> Result<Self, DirstateError> {
441 441 if let Some(data) = on_disk.get(..data_size) {
442 442 Ok(on_disk::read(data, metadata)?)
443 443 } else {
444 444 Err(DirstateV2ParseError.into())
445 445 }
446 446 }
447 447
448 448 #[timed]
449 449 pub fn new_v1(
450 450 on_disk: &'on_disk [u8],
451 451 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
452 452 let mut map = Self::empty(on_disk);
453 453 if map.on_disk.is_empty() {
454 454 return Ok((map, None));
455 455 }
456 456
457 457 let parents = parse_dirstate_entries(
458 458 map.on_disk,
459 459 |path, entry, copy_source| {
460 460 let tracked = entry.state().is_tracked();
461 461 let node = Self::get_or_insert_node(
462 462 map.on_disk,
463 463 &mut map.unreachable_bytes,
464 464 &mut map.root,
465 465 path,
466 466 WithBasename::to_cow_borrowed,
467 467 |ancestor| {
468 468 if tracked {
469 469 ancestor.tracked_descendants_count += 1
470 470 }
471 471 ancestor.descendants_with_entry_count += 1
472 472 },
473 473 )?;
474 474 assert!(
475 475 !node.data.has_entry(),
476 476 "duplicate dirstate entry in read"
477 477 );
478 478 assert!(
479 479 node.copy_source.is_none(),
480 480 "duplicate dirstate entry in read"
481 481 );
482 482 node.data = NodeData::Entry(*entry);
483 483 node.copy_source = copy_source.map(Cow::Borrowed);
484 484 map.nodes_with_entry_count += 1;
485 485 if copy_source.is_some() {
486 486 map.nodes_with_copy_source_count += 1
487 487 }
488 488 Ok(())
489 489 },
490 490 )?;
491 491 let parents = Some(parents.clone());
492 492
493 493 Ok((map, parents))
494 494 }
495 495
496 496 /// Assuming dirstate-v2 format, returns whether the next write should
497 497 /// append to the existing data file that contains `self.on_disk` (true),
498 498 /// or create a new data file from scratch (false).
499 499 pub(super) fn write_should_append(&self) -> bool {
500 500 let ratio = self.unreachable_bytes as f32 / self.on_disk.len() as f32;
501 501 ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
502 502 }
503 503
504 504 fn get_node<'tree>(
505 505 &'tree self,
506 506 path: &HgPath,
507 507 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
508 508 let mut children = self.root.as_ref();
509 509 let mut components = path.components();
510 510 let mut component =
511 511 components.next().expect("expected at least one components");
512 512 loop {
513 513 if let Some(child) = children.get(component, self.on_disk)? {
514 514 if let Some(next_component) = components.next() {
515 515 component = next_component;
516 516 children = child.children(self.on_disk)?;
517 517 } else {
518 518 return Ok(Some(child));
519 519 }
520 520 } else {
521 521 return Ok(None);
522 522 }
523 523 }
524 524 }
525 525
526 526 /// Returns a mutable reference to the node at `path` if it exists
527 527 ///
528 528 /// This takes `root` instead of `&mut self` so that callers can mutate
529 529 /// other fields while the returned borrow is still valid
530 530 fn get_node_mut<'tree>(
531 531 on_disk: &'on_disk [u8],
532 532 unreachable_bytes: &mut u32,
533 533 root: &'tree mut ChildNodes<'on_disk>,
534 534 path: &HgPath,
535 535 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
536 536 let mut children = root;
537 537 let mut components = path.components();
538 538 let mut component =
539 539 components.next().expect("expected at least one components");
540 540 loop {
541 541 if let Some(child) = children
542 542 .make_mut(on_disk, unreachable_bytes)?
543 543 .get_mut(component)
544 544 {
545 545 if let Some(next_component) = components.next() {
546 546 component = next_component;
547 547 children = &mut child.children;
548 548 } else {
549 549 return Ok(Some(child));
550 550 }
551 551 } else {
552 552 return Ok(None);
553 553 }
554 554 }
555 555 }
556 556
557 557 pub(super) fn get_or_insert<'tree, 'path>(
558 558 &'tree mut self,
559 559 path: &HgPath,
560 560 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
561 561 Self::get_or_insert_node(
562 562 self.on_disk,
563 563 &mut self.unreachable_bytes,
564 564 &mut self.root,
565 565 path,
566 566 WithBasename::to_cow_owned,
567 567 |_| {},
568 568 )
569 569 }
570 570
571 571 fn get_or_insert_node<'tree, 'path>(
572 572 on_disk: &'on_disk [u8],
573 573 unreachable_bytes: &mut u32,
574 574 root: &'tree mut ChildNodes<'on_disk>,
575 575 path: &'path HgPath,
576 576 to_cow: impl Fn(
577 577 WithBasename<&'path HgPath>,
578 578 ) -> WithBasename<Cow<'on_disk, HgPath>>,
579 579 mut each_ancestor: impl FnMut(&mut Node),
580 580 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
581 581 let mut child_nodes = root;
582 582 let mut inclusive_ancestor_paths =
583 583 WithBasename::inclusive_ancestors_of(path);
584 584 let mut ancestor_path = inclusive_ancestor_paths
585 585 .next()
586 586 .expect("expected at least one inclusive ancestor");
587 587 loop {
588 588 // TODO: can we avoid allocating an owned key in cases where the
589 589 // map already contains that key, without introducing double
590 590 // lookup?
591 591 let child_node = child_nodes
592 592 .make_mut(on_disk, unreachable_bytes)?
593 593 .entry(to_cow(ancestor_path))
594 594 .or_default();
595 595 if let Some(next) = inclusive_ancestor_paths.next() {
596 596 each_ancestor(child_node);
597 597 ancestor_path = next;
598 598 child_nodes = &mut child_node.children;
599 599 } else {
600 600 return Ok(child_node);
601 601 }
602 602 }
603 603 }
604 604
605 605 fn add_or_remove_file(
606 606 &mut self,
607 607 path: &HgPath,
608 608 old_state: Option<EntryState>,
609 609 new_entry: DirstateEntry,
610 610 ) -> Result<(), DirstateV2ParseError> {
611 611 let had_entry = old_state.is_some();
612 612 let was_tracked = old_state.map_or(false, |s| s.is_tracked());
613 613 let tracked_count_increment =
614 614 match (was_tracked, new_entry.state().is_tracked()) {
615 615 (false, true) => 1,
616 616 (true, false) => -1,
617 617 _ => 0,
618 618 };
619 619
620 620 let node = Self::get_or_insert_node(
621 621 self.on_disk,
622 622 &mut self.unreachable_bytes,
623 623 &mut self.root,
624 624 path,
625 625 WithBasename::to_cow_owned,
626 626 |ancestor| {
627 627 if !had_entry {
628 628 ancestor.descendants_with_entry_count += 1;
629 629 }
630 630
631 631 // We can’t use `+= increment` because the counter is unsigned,
632 632 // and we want debug builds to detect accidental underflow
633 633 // through zero
634 634 match tracked_count_increment {
635 635 1 => ancestor.tracked_descendants_count += 1,
636 636 -1 => ancestor.tracked_descendants_count -= 1,
637 637 _ => {}
638 638 }
639 639 },
640 640 )?;
641 641 if !had_entry {
642 642 self.nodes_with_entry_count += 1
643 643 }
644 644 node.data = NodeData::Entry(new_entry);
645 645 Ok(())
646 646 }
647 647
648 648 fn iter_nodes<'tree>(
649 649 &'tree self,
650 650 ) -> impl Iterator<
651 651 Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>,
652 652 > + 'tree {
653 653 // Depth first tree traversal.
654 654 //
655 655 // If we could afford internal iteration and recursion,
656 656 // this would look like:
657 657 //
658 658 // ```
659 659 // fn traverse_children(
660 660 // children: &ChildNodes,
661 661 // each: &mut impl FnMut(&Node),
662 662 // ) {
663 663 // for child in children.values() {
664 664 // traverse_children(&child.children, each);
665 665 // each(child);
666 666 // }
667 667 // }
668 668 // ```
669 669 //
670 670 // However we want an external iterator and therefore can’t use the
671 671 // call stack. Use an explicit stack instead:
672 672 let mut stack = Vec::new();
673 673 let mut iter = self.root.as_ref().iter();
674 674 std::iter::from_fn(move || {
675 675 while let Some(child_node) = iter.next() {
676 676 let children = match child_node.children(self.on_disk) {
677 677 Ok(children) => children,
678 678 Err(error) => return Some(Err(error)),
679 679 };
680 680 // Pseudo-recursion
681 681 let new_iter = children.iter();
682 682 let old_iter = std::mem::replace(&mut iter, new_iter);
683 683 stack.push((child_node, old_iter));
684 684 }
685 685 // Found the end of a `children.iter()` iterator.
686 686 if let Some((child_node, next_iter)) = stack.pop() {
687 687 // "Return" from pseudo-recursion by restoring state from the
688 688 // explicit stack
689 689 iter = next_iter;
690 690
691 691 Some(Ok(child_node))
692 692 } else {
693 693 // Reached the bottom of the stack, we’re done
694 694 None
695 695 }
696 696 })
697 697 }
698 698
699 699 fn count_dropped_path(unreachable_bytes: &mut u32, path: &Cow<HgPath>) {
700 700 if let Cow::Borrowed(path) = path {
701 701 *unreachable_bytes += path.len() as u32
702 702 }
703 703 }
704 704 }
705 705
706 706 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
707 707 ///
708 708 /// The callback is only called for incoming `Ok` values. Errors are passed
709 709 /// through as-is. In order to let it use the `?` operator the callback is
710 710 /// expected to return a `Result` of `Option`, instead of an `Option` of
711 711 /// `Result`.
712 712 fn filter_map_results<'a, I, F, A, B, E>(
713 713 iter: I,
714 714 f: F,
715 715 ) -> impl Iterator<Item = Result<B, E>> + 'a
716 716 where
717 717 I: Iterator<Item = Result<A, E>> + 'a,
718 718 F: Fn(A) -> Result<Option<B>, E> + 'a,
719 719 {
720 720 iter.filter_map(move |result| match result {
721 721 Ok(node) => f(node).transpose(),
722 722 Err(e) => Some(Err(e)),
723 723 })
724 724 }
725 725
726 726 impl OwningDirstateMap {
727 727 pub fn clear(&mut self) {
728 728 self.with_dmap_mut(|map| {
729 729 map.root = Default::default();
730 730 map.nodes_with_entry_count = 0;
731 731 map.nodes_with_copy_source_count = 0;
732 732 });
733 733 }
734 734
735 735 pub fn set_entry(
736 736 &mut self,
737 737 filename: &HgPath,
738 738 entry: DirstateEntry,
739 739 ) -> Result<(), DirstateV2ParseError> {
740 740 self.with_dmap_mut(|map| {
741 741 map.get_or_insert(&filename)?.data = NodeData::Entry(entry);
742 742 Ok(())
743 743 })
744 744 }
745 745
746 746 pub fn add_file(
747 747 &mut self,
748 748 filename: &HgPath,
749 749 entry: DirstateEntry,
750 750 ) -> Result<(), DirstateError> {
751 751 let old_state = self.get(filename)?.map(|e| e.state());
752 752 self.with_dmap_mut(|map| {
753 753 Ok(map.add_or_remove_file(filename, old_state, entry)?)
754 754 })
755 755 }
756 756
757 757 pub fn remove_file(
758 758 &mut self,
759 759 filename: &HgPath,
760 760 in_merge: bool,
761 761 ) -> Result<(), DirstateError> {
762 762 let old_entry_opt = self.get(filename)?;
763 763 let old_state = old_entry_opt.map(|e| e.state());
764 764 let mut size = 0;
765 765 if in_merge {
766 766 // XXX we should not be able to have 'm' state and 'FROM_P2' if not
767 767 // during a merge. So I (marmoute) am not sure we need the
768 768 // conditionnal at all. Adding double checking this with assert
769 769 // would be nice.
770 770 if let Some(old_entry) = old_entry_opt {
771 771 // backup the previous state
772 772 if old_entry.state() == EntryState::Merged {
773 773 size = SIZE_NON_NORMAL;
774 774 } else if old_entry.state() == EntryState::Normal
775 775 && old_entry.size() == SIZE_FROM_OTHER_PARENT
776 776 {
777 777 // other parent
778 778 size = SIZE_FROM_OTHER_PARENT;
779 779 }
780 780 }
781 781 }
782 782 if size == 0 {
783 783 self.copy_map_remove(filename)?;
784 784 }
785 785 self.with_dmap_mut(|map| {
786 786 let entry = DirstateEntry::new_removed(size);
787 787 Ok(map.add_or_remove_file(filename, old_state, entry)?)
788 788 })
789 789 }
790 790
791 791 pub fn drop_entry_and_copy_source(
792 792 &mut self,
793 793 filename: &HgPath,
794 794 ) -> Result<(), DirstateError> {
795 795 let was_tracked = self
796 796 .get(filename)?
797 797 .map_or(false, |e| e.state().is_tracked());
798 798 struct Dropped {
799 799 was_tracked: bool,
800 800 had_entry: bool,
801 801 had_copy_source: bool,
802 802 }
803 803
804 804 /// If this returns `Ok(Some((dropped, removed)))`, then
805 805 ///
806 806 /// * `dropped` is about the leaf node that was at `filename`
807 807 /// * `removed` is whether this particular level of recursion just
808 808 /// removed a node in `nodes`.
809 809 fn recur<'on_disk>(
810 810 on_disk: &'on_disk [u8],
811 811 unreachable_bytes: &mut u32,
812 812 nodes: &mut ChildNodes<'on_disk>,
813 813 path: &HgPath,
814 814 ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
815 815 let (first_path_component, rest_of_path) =
816 816 path.split_first_component();
817 817 let nodes = nodes.make_mut(on_disk, unreachable_bytes)?;
818 818 let node = if let Some(node) = nodes.get_mut(first_path_component)
819 819 {
820 820 node
821 821 } else {
822 822 return Ok(None);
823 823 };
824 824 let dropped;
825 825 if let Some(rest) = rest_of_path {
826 826 if let Some((d, removed)) = recur(
827 827 on_disk,
828 828 unreachable_bytes,
829 829 &mut node.children,
830 830 rest,
831 831 )? {
832 832 dropped = d;
833 833 if dropped.had_entry {
834 node.descendants_with_entry_count -= 1;
834 node.descendants_with_entry_count = node
835 .descendants_with_entry_count
836 .checked_sub(1)
837 .expect(
838 "descendants_with_entry_count should be >= 0",
839 );
835 840 }
836 841 if dropped.was_tracked {
837 node.tracked_descendants_count -= 1;
842 node.tracked_descendants_count = node
843 .tracked_descendants_count
844 .checked_sub(1)
845 .expect(
846 "tracked_descendants_count should be >= 0",
847 );
838 848 }
839 849
840 850 // Directory caches must be invalidated when removing a
841 851 // child node
842 852 if removed {
843 853 if let NodeData::CachedDirectory { .. } = &node.data {
844 854 node.data = NodeData::None
845 855 }
846 856 }
847 857 } else {
848 858 return Ok(None);
849 859 }
850 860 } else {
851 861 let had_entry = node.data.has_entry();
852 862 if had_entry {
853 863 node.data = NodeData::None
854 864 }
855 865 if let Some(source) = &node.copy_source {
856 866 DirstateMap::count_dropped_path(unreachable_bytes, source);
857 867 node.copy_source = None
858 868 }
859 869 dropped = Dropped {
860 870 was_tracked: node
861 871 .data
862 872 .as_entry()
863 873 .map_or(false, |entry| entry.state().is_tracked()),
864 874 had_entry,
865 875 had_copy_source: node.copy_source.take().is_some(),
866 876 };
867 877 }
868 878 // After recursion, for both leaf (rest_of_path is None) nodes and
869 879 // parent nodes, remove a node if it just became empty.
870 880 let remove = !node.data.has_entry()
871 881 && node.copy_source.is_none()
872 882 && node.children.is_empty();
873 883 if remove {
874 884 let (key, _) =
875 885 nodes.remove_entry(first_path_component).unwrap();
876 886 DirstateMap::count_dropped_path(
877 887 unreachable_bytes,
878 888 key.full_path(),
879 889 )
880 890 }
881 891 Ok(Some((dropped, remove)))
882 892 }
883 893
884 894 self.with_dmap_mut(|map| {
885 895 if let Some((dropped, _removed)) = recur(
886 896 map.on_disk,
887 897 &mut map.unreachable_bytes,
888 898 &mut map.root,
889 899 filename,
890 900 )? {
891 901 if dropped.had_entry {
892 map.nodes_with_entry_count -= 1
902 map.nodes_with_entry_count = map
903 .nodes_with_entry_count
904 .checked_sub(1)
905 .expect("nodes_with_entry_count should be >= 0");
893 906 }
894 907 if dropped.had_copy_source {
895 map.nodes_with_copy_source_count -= 1
908 map.nodes_with_copy_source_count = map
909 .nodes_with_copy_source_count
910 .checked_sub(1)
911 .expect("nodes_with_copy_source_count should be >= 0");
896 912 }
897 913 } else {
898 914 debug_assert!(!was_tracked);
899 915 }
900 916 Ok(())
901 917 })
902 918 }
903 919
904 920 pub fn has_tracked_dir(
905 921 &mut self,
906 922 directory: &HgPath,
907 923 ) -> Result<bool, DirstateError> {
908 924 self.with_dmap_mut(|map| {
909 925 if let Some(node) = map.get_node(directory)? {
910 926 // A node without a `DirstateEntry` was created to hold child
911 927 // nodes, and is therefore a directory.
912 928 let state = node.state()?;
913 929 Ok(state.is_none() && node.tracked_descendants_count() > 0)
914 930 } else {
915 931 Ok(false)
916 932 }
917 933 })
918 934 }
919 935
920 936 pub fn has_dir(
921 937 &mut self,
922 938 directory: &HgPath,
923 939 ) -> Result<bool, DirstateError> {
924 940 self.with_dmap_mut(|map| {
925 941 if let Some(node) = map.get_node(directory)? {
926 942 // A node without a `DirstateEntry` was created to hold child
927 943 // nodes, and is therefore a directory.
928 944 let state = node.state()?;
929 945 Ok(state.is_none() && node.descendants_with_entry_count() > 0)
930 946 } else {
931 947 Ok(false)
932 948 }
933 949 })
934 950 }
935 951
936 952 #[timed]
937 953 pub fn pack_v1(
938 954 &self,
939 955 parents: DirstateParents,
940 956 ) -> Result<Vec<u8>, DirstateError> {
941 957 let map = self.get_map();
942 958 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
943 959 // reallocations
944 960 let mut size = parents.as_bytes().len();
945 961 for node in map.iter_nodes() {
946 962 let node = node?;
947 963 if node.entry()?.is_some() {
948 964 size += packed_entry_size(
949 965 node.full_path(map.on_disk)?,
950 966 node.copy_source(map.on_disk)?,
951 967 );
952 968 }
953 969 }
954 970
955 971 let mut packed = Vec::with_capacity(size);
956 972 packed.extend(parents.as_bytes());
957 973
958 974 for node in map.iter_nodes() {
959 975 let node = node?;
960 976 if let Some(entry) = node.entry()? {
961 977 pack_entry(
962 978 node.full_path(map.on_disk)?,
963 979 &entry,
964 980 node.copy_source(map.on_disk)?,
965 981 &mut packed,
966 982 );
967 983 }
968 984 }
969 985 Ok(packed)
970 986 }
971 987
972 988 /// Returns new data and metadata together with whether that data should be
973 989 /// appended to the existing data file whose content is at
974 990 /// `map.on_disk` (true), instead of written to a new data file
975 991 /// (false).
976 992 #[timed]
977 993 pub fn pack_v2(
978 994 &self,
979 995 can_append: bool,
980 996 ) -> Result<(Vec<u8>, on_disk::TreeMetadata, bool), DirstateError> {
981 997 let map = self.get_map();
982 998 on_disk::write(map, can_append)
983 999 }
984 1000
985 1001 /// `callback` allows the caller to process and do something with the
986 1002 /// results of the status. This is needed to do so efficiently (i.e.
987 1003 /// without cloning the `DirstateStatus` object with its paths) because
988 1004 /// we need to borrow from `Self`.
989 1005 pub fn with_status<R>(
990 1006 &mut self,
991 1007 matcher: &(dyn Matcher + Sync),
992 1008 root_dir: PathBuf,
993 1009 ignore_files: Vec<PathBuf>,
994 1010 options: StatusOptions,
995 1011 callback: impl for<'r> FnOnce(
996 1012 Result<(DirstateStatus<'r>, Vec<PatternFileWarning>), StatusError>,
997 1013 ) -> R,
998 1014 ) -> R {
999 1015 self.with_dmap_mut(|map| {
1000 1016 callback(super::status::status(
1001 1017 map,
1002 1018 matcher,
1003 1019 root_dir,
1004 1020 ignore_files,
1005 1021 options,
1006 1022 ))
1007 1023 })
1008 1024 }
1009 1025
1010 1026 pub fn copy_map_len(&self) -> usize {
1011 1027 let map = self.get_map();
1012 1028 map.nodes_with_copy_source_count as usize
1013 1029 }
1014 1030
1015 1031 pub fn copy_map_iter(&self) -> CopyMapIter<'_> {
1016 1032 let map = self.get_map();
1017 1033 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1018 1034 Ok(if let Some(source) = node.copy_source(map.on_disk)? {
1019 1035 Some((node.full_path(map.on_disk)?, source))
1020 1036 } else {
1021 1037 None
1022 1038 })
1023 1039 }))
1024 1040 }
1025 1041
1026 1042 pub fn copy_map_contains_key(
1027 1043 &self,
1028 1044 key: &HgPath,
1029 1045 ) -> Result<bool, DirstateV2ParseError> {
1030 1046 let map = self.get_map();
1031 1047 Ok(if let Some(node) = map.get_node(key)? {
1032 1048 node.has_copy_source()
1033 1049 } else {
1034 1050 false
1035 1051 })
1036 1052 }
1037 1053
1038 1054 pub fn copy_map_get(
1039 1055 &self,
1040 1056 key: &HgPath,
1041 1057 ) -> Result<Option<&HgPath>, DirstateV2ParseError> {
1042 1058 let map = self.get_map();
1043 1059 if let Some(node) = map.get_node(key)? {
1044 1060 if let Some(source) = node.copy_source(map.on_disk)? {
1045 1061 return Ok(Some(source));
1046 1062 }
1047 1063 }
1048 1064 Ok(None)
1049 1065 }
1050 1066
1051 1067 pub fn copy_map_remove(
1052 1068 &mut self,
1053 1069 key: &HgPath,
1054 1070 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1055 1071 self.with_dmap_mut(|map| {
1056 1072 let count = &mut map.nodes_with_copy_source_count;
1057 1073 let unreachable_bytes = &mut map.unreachable_bytes;
1058 1074 Ok(DirstateMap::get_node_mut(
1059 1075 map.on_disk,
1060 1076 unreachable_bytes,
1061 1077 &mut map.root,
1062 1078 key,
1063 1079 )?
1064 1080 .and_then(|node| {
1065 1081 if let Some(source) = &node.copy_source {
1066 1082 *count -= 1;
1067 1083 DirstateMap::count_dropped_path(unreachable_bytes, source);
1068 1084 }
1069 1085 node.copy_source.take().map(Cow::into_owned)
1070 1086 }))
1071 1087 })
1072 1088 }
1073 1089
1074 1090 pub fn copy_map_insert(
1075 1091 &mut self,
1076 1092 key: HgPathBuf,
1077 1093 value: HgPathBuf,
1078 1094 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1079 1095 self.with_dmap_mut(|map| {
1080 1096 let node = DirstateMap::get_or_insert_node(
1081 1097 map.on_disk,
1082 1098 &mut map.unreachable_bytes,
1083 1099 &mut map.root,
1084 1100 &key,
1085 1101 WithBasename::to_cow_owned,
1086 1102 |_ancestor| {},
1087 1103 )?;
1088 1104 if node.copy_source.is_none() {
1089 1105 map.nodes_with_copy_source_count += 1
1090 1106 }
1091 1107 Ok(node.copy_source.replace(value.into()).map(Cow::into_owned))
1092 1108 })
1093 1109 }
1094 1110
1095 1111 pub fn len(&self) -> usize {
1096 1112 let map = self.get_map();
1097 1113 map.nodes_with_entry_count as usize
1098 1114 }
1099 1115
1100 1116 pub fn contains_key(
1101 1117 &self,
1102 1118 key: &HgPath,
1103 1119 ) -> Result<bool, DirstateV2ParseError> {
1104 1120 Ok(self.get(key)?.is_some())
1105 1121 }
1106 1122
1107 1123 pub fn get(
1108 1124 &self,
1109 1125 key: &HgPath,
1110 1126 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
1111 1127 let map = self.get_map();
1112 1128 Ok(if let Some(node) = map.get_node(key)? {
1113 1129 node.entry()?
1114 1130 } else {
1115 1131 None
1116 1132 })
1117 1133 }
1118 1134
1119 1135 pub fn iter(&self) -> StateMapIter<'_> {
1120 1136 let map = self.get_map();
1121 1137 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1122 1138 Ok(if let Some(entry) = node.entry()? {
1123 1139 Some((node.full_path(map.on_disk)?, entry))
1124 1140 } else {
1125 1141 None
1126 1142 })
1127 1143 }))
1128 1144 }
1129 1145
1130 1146 pub fn iter_tracked_dirs(
1131 1147 &mut self,
1132 1148 ) -> Result<
1133 1149 Box<
1134 1150 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
1135 1151 + Send
1136 1152 + '_,
1137 1153 >,
1138 1154 DirstateError,
1139 1155 > {
1140 1156 let map = self.get_map();
1141 1157 let on_disk = map.on_disk;
1142 1158 Ok(Box::new(filter_map_results(
1143 1159 map.iter_nodes(),
1144 1160 move |node| {
1145 1161 Ok(if node.tracked_descendants_count() > 0 {
1146 1162 Some(node.full_path(on_disk)?)
1147 1163 } else {
1148 1164 None
1149 1165 })
1150 1166 },
1151 1167 )))
1152 1168 }
1153 1169
1154 1170 pub fn debug_iter(
1155 1171 &self,
1156 1172 all: bool,
1157 1173 ) -> Box<
1158 1174 dyn Iterator<
1159 1175 Item = Result<
1160 1176 (&HgPath, (u8, i32, i32, i32)),
1161 1177 DirstateV2ParseError,
1162 1178 >,
1163 1179 > + Send
1164 1180 + '_,
1165 1181 > {
1166 1182 let map = self.get_map();
1167 1183 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1168 1184 let debug_tuple = if let Some(entry) = node.entry()? {
1169 1185 entry.debug_tuple()
1170 1186 } else if !all {
1171 1187 return Ok(None);
1172 1188 } else if let Some(mtime) = node.cached_directory_mtime()? {
1173 1189 (b' ', 0, -1, mtime.truncated_seconds() as i32)
1174 1190 } else {
1175 1191 (b' ', 0, -1, -1)
1176 1192 };
1177 1193 Ok(Some((node.full_path(map.on_disk)?, debug_tuple)))
1178 1194 }))
1179 1195 }
1180 1196 }
General Comments 0
You need to be logged in to leave comments. Login now