##// END OF EJS Templates
rust-dirstatemap: add `set_cached_mtime` helper method...
Raphaël Gomès -
r50019:464747fa default
parent child Browse files
Show More
@@ -1,1869 +1,1893 b''
1 1 use bytes_cast::BytesCast;
2 2 use micro_timer::timed;
3 3 use std::borrow::Cow;
4 4 use std::path::PathBuf;
5 5
6 6 use super::on_disk;
7 7 use super::on_disk::DirstateV2ParseError;
8 8 use super::owning::OwningDirstateMap;
9 9 use super::path_with_basename::WithBasename;
10 10 use crate::dirstate::parsers::pack_entry;
11 11 use crate::dirstate::parsers::packed_entry_size;
12 12 use crate::dirstate::parsers::parse_dirstate_entries;
13 13 use crate::dirstate::CopyMapIter;
14 14 use crate::dirstate::DirstateV2Data;
15 15 use crate::dirstate::ParentFileData;
16 16 use crate::dirstate::StateMapIter;
17 17 use crate::dirstate::TruncatedTimestamp;
18 18 use crate::matchers::Matcher;
19 19 use crate::utils::hg_path::{HgPath, HgPathBuf};
20 20 use crate::DirstateEntry;
21 21 use crate::DirstateError;
22 22 use crate::DirstateMapError;
23 23 use crate::DirstateParents;
24 24 use crate::DirstateStatus;
25 25 use crate::EntryState;
26 26 use crate::FastHashbrownMap as FastHashMap;
27 27 use crate::PatternFileWarning;
28 28 use crate::StatusError;
29 29 use crate::StatusOptions;
30 30
31 31 /// Append to an existing data file if the amount of unreachable data (not used
32 32 /// anymore) is less than this fraction of the total amount of existing data.
33 33 const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5;
34 34
35 35 #[derive(Debug)]
36 36 pub struct DirstateMap<'on_disk> {
37 37 /// Contents of the `.hg/dirstate` file
38 38 pub(super) on_disk: &'on_disk [u8],
39 39
40 40 pub(super) root: ChildNodes<'on_disk>,
41 41
42 42 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
43 43 pub(super) nodes_with_entry_count: u32,
44 44
45 45 /// Number of nodes anywhere in the tree that have
46 46 /// `.copy_source.is_some()`.
47 47 pub(super) nodes_with_copy_source_count: u32,
48 48
49 49 /// See on_disk::Header
50 50 pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
51 51
52 52 /// How many bytes of `on_disk` are not used anymore
53 53 pub(super) unreachable_bytes: u32,
54 54 }
55 55
56 56 /// Using a plain `HgPathBuf` of the full path from the repository root as a
57 57 /// map key would also work: all paths in a given map have the same parent
58 58 /// path, so comparing full paths gives the same result as comparing base
59 59 /// names. However `HashMap` would waste time always re-hashing the same
60 60 /// string prefix.
61 61 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
62 62
63 63 /// Similar to `&'tree Cow<'on_disk, HgPath>`, but can also be returned
64 64 /// for on-disk nodes that don’t actually have a `Cow` to borrow.
65 65 #[derive(Debug)]
66 66 pub(super) enum BorrowedPath<'tree, 'on_disk> {
67 67 InMemory(&'tree HgPathBuf),
68 68 OnDisk(&'on_disk HgPath),
69 69 }
70 70
71 71 #[derive(Debug)]
72 72 pub(super) enum ChildNodes<'on_disk> {
73 73 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
74 74 OnDisk(&'on_disk [on_disk::Node]),
75 75 }
76 76
77 77 #[derive(Debug)]
78 78 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
79 79 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
80 80 OnDisk(&'on_disk [on_disk::Node]),
81 81 }
82 82
83 83 #[derive(Debug)]
84 84 pub(super) enum NodeRef<'tree, 'on_disk> {
85 85 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
86 86 OnDisk(&'on_disk on_disk::Node),
87 87 }
88 88
89 89 impl<'tree, 'on_disk> BorrowedPath<'tree, 'on_disk> {
90 90 pub fn detach_from_tree(&self) -> Cow<'on_disk, HgPath> {
91 91 match *self {
92 92 BorrowedPath::InMemory(in_memory) => Cow::Owned(in_memory.clone()),
93 93 BorrowedPath::OnDisk(on_disk) => Cow::Borrowed(on_disk),
94 94 }
95 95 }
96 96 }
97 97
98 98 impl<'tree, 'on_disk> std::ops::Deref for BorrowedPath<'tree, 'on_disk> {
99 99 type Target = HgPath;
100 100
101 101 fn deref(&self) -> &HgPath {
102 102 match *self {
103 103 BorrowedPath::InMemory(in_memory) => in_memory,
104 104 BorrowedPath::OnDisk(on_disk) => on_disk,
105 105 }
106 106 }
107 107 }
108 108
109 109 impl Default for ChildNodes<'_> {
110 110 fn default() -> Self {
111 111 ChildNodes::InMemory(Default::default())
112 112 }
113 113 }
114 114
115 115 impl<'on_disk> ChildNodes<'on_disk> {
116 116 pub(super) fn as_ref<'tree>(
117 117 &'tree self,
118 118 ) -> ChildNodesRef<'tree, 'on_disk> {
119 119 match self {
120 120 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
121 121 ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes),
122 122 }
123 123 }
124 124
125 125 pub(super) fn is_empty(&self) -> bool {
126 126 match self {
127 127 ChildNodes::InMemory(nodes) => nodes.is_empty(),
128 128 ChildNodes::OnDisk(nodes) => nodes.is_empty(),
129 129 }
130 130 }
131 131
132 132 fn make_mut(
133 133 &mut self,
134 134 on_disk: &'on_disk [u8],
135 135 unreachable_bytes: &mut u32,
136 136 ) -> Result<
137 137 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
138 138 DirstateV2ParseError,
139 139 > {
140 140 match self {
141 141 ChildNodes::InMemory(nodes) => Ok(nodes),
142 142 ChildNodes::OnDisk(nodes) => {
143 143 *unreachable_bytes +=
144 144 std::mem::size_of_val::<[on_disk::Node]>(nodes) as u32;
145 145 let nodes = nodes
146 146 .iter()
147 147 .map(|node| {
148 148 Ok((
149 149 node.path(on_disk)?,
150 150 node.to_in_memory_node(on_disk)?,
151 151 ))
152 152 })
153 153 .collect::<Result<_, _>>()?;
154 154 *self = ChildNodes::InMemory(nodes);
155 155 match self {
156 156 ChildNodes::InMemory(nodes) => Ok(nodes),
157 157 ChildNodes::OnDisk(_) => unreachable!(),
158 158 }
159 159 }
160 160 }
161 161 }
162 162 }
163 163
164 164 impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
165 165 pub(super) fn get(
166 166 &self,
167 167 base_name: &HgPath,
168 168 on_disk: &'on_disk [u8],
169 169 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
170 170 match self {
171 171 ChildNodesRef::InMemory(nodes) => Ok(nodes
172 172 .get_key_value(base_name)
173 173 .map(|(k, v)| NodeRef::InMemory(k, v))),
174 174 ChildNodesRef::OnDisk(nodes) => {
175 175 let mut parse_result = Ok(());
176 176 let search_result = nodes.binary_search_by(|node| {
177 177 match node.base_name(on_disk) {
178 178 Ok(node_base_name) => node_base_name.cmp(base_name),
179 179 Err(e) => {
180 180 parse_result = Err(e);
181 181 // Dummy comparison result, `search_result` won’t
182 182 // be used since `parse_result` is an error
183 183 std::cmp::Ordering::Equal
184 184 }
185 185 }
186 186 });
187 187 parse_result.map(|()| {
188 188 search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i]))
189 189 })
190 190 }
191 191 }
192 192 }
193 193
194 194 /// Iterate in undefined order
195 195 pub(super) fn iter(
196 196 &self,
197 197 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
198 198 match self {
199 199 ChildNodesRef::InMemory(nodes) => itertools::Either::Left(
200 200 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)),
201 201 ),
202 202 ChildNodesRef::OnDisk(nodes) => {
203 203 itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk))
204 204 }
205 205 }
206 206 }
207 207
208 208 /// Iterate in parallel in undefined order
209 209 pub(super) fn par_iter(
210 210 &self,
211 211 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
212 212 {
213 213 use rayon::prelude::*;
214 214 match self {
215 215 ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left(
216 216 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)),
217 217 ),
218 218 ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right(
219 219 nodes.par_iter().map(NodeRef::OnDisk),
220 220 ),
221 221 }
222 222 }
223 223
224 224 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
225 225 match self {
226 226 ChildNodesRef::InMemory(nodes) => {
227 227 let mut vec: Vec<_> = nodes
228 228 .iter()
229 229 .map(|(k, v)| NodeRef::InMemory(k, v))
230 230 .collect();
231 231 fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
232 232 match node {
233 233 NodeRef::InMemory(path, _node) => path.base_name(),
234 234 NodeRef::OnDisk(_) => unreachable!(),
235 235 }
236 236 }
237 237 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
238 238 // value: https://github.com/rust-lang/rust/issues/34162
239 239 vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
240 240 vec
241 241 }
242 242 ChildNodesRef::OnDisk(nodes) => {
243 243 // Nodes on disk are already sorted
244 244 nodes.iter().map(NodeRef::OnDisk).collect()
245 245 }
246 246 }
247 247 }
248 248 }
249 249
250 250 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
251 251 pub(super) fn full_path(
252 252 &self,
253 253 on_disk: &'on_disk [u8],
254 254 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
255 255 match self {
256 256 NodeRef::InMemory(path, _node) => Ok(path.full_path()),
257 257 NodeRef::OnDisk(node) => node.full_path(on_disk),
258 258 }
259 259 }
260 260
261 261 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
262 262 /// HgPath>` detached from `'tree`
263 263 pub(super) fn full_path_borrowed(
264 264 &self,
265 265 on_disk: &'on_disk [u8],
266 266 ) -> Result<BorrowedPath<'tree, 'on_disk>, DirstateV2ParseError> {
267 267 match self {
268 268 NodeRef::InMemory(path, _node) => match path.full_path() {
269 269 Cow::Borrowed(on_disk) => Ok(BorrowedPath::OnDisk(on_disk)),
270 270 Cow::Owned(in_memory) => Ok(BorrowedPath::InMemory(in_memory)),
271 271 },
272 272 NodeRef::OnDisk(node) => {
273 273 Ok(BorrowedPath::OnDisk(node.full_path(on_disk)?))
274 274 }
275 275 }
276 276 }
277 277
278 278 pub(super) fn base_name(
279 279 &self,
280 280 on_disk: &'on_disk [u8],
281 281 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
282 282 match self {
283 283 NodeRef::InMemory(path, _node) => Ok(path.base_name()),
284 284 NodeRef::OnDisk(node) => node.base_name(on_disk),
285 285 }
286 286 }
287 287
288 288 pub(super) fn children(
289 289 &self,
290 290 on_disk: &'on_disk [u8],
291 291 ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
292 292 match self {
293 293 NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
294 294 NodeRef::OnDisk(node) => {
295 295 Ok(ChildNodesRef::OnDisk(node.children(on_disk)?))
296 296 }
297 297 }
298 298 }
299 299
300 300 pub(super) fn has_copy_source(&self) -> bool {
301 301 match self {
302 302 NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
303 303 NodeRef::OnDisk(node) => node.has_copy_source(),
304 304 }
305 305 }
306 306
307 307 pub(super) fn copy_source(
308 308 &self,
309 309 on_disk: &'on_disk [u8],
310 310 ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
311 311 match self {
312 312 NodeRef::InMemory(_path, node) => {
313 313 Ok(node.copy_source.as_ref().map(|s| &**s))
314 314 }
315 315 NodeRef::OnDisk(node) => node.copy_source(on_disk),
316 316 }
317 317 }
318 318 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
319 319 /// HgPath>` detached from `'tree`
320 320 pub(super) fn copy_source_borrowed(
321 321 &self,
322 322 on_disk: &'on_disk [u8],
323 323 ) -> Result<Option<BorrowedPath<'tree, 'on_disk>>, DirstateV2ParseError>
324 324 {
325 325 Ok(match self {
326 326 NodeRef::InMemory(_path, node) => {
327 327 node.copy_source.as_ref().map(|source| match source {
328 328 Cow::Borrowed(on_disk) => BorrowedPath::OnDisk(on_disk),
329 329 Cow::Owned(in_memory) => BorrowedPath::InMemory(in_memory),
330 330 })
331 331 }
332 332 NodeRef::OnDisk(node) => node
333 333 .copy_source(on_disk)?
334 334 .map(|source| BorrowedPath::OnDisk(source)),
335 335 })
336 336 }
337 337
338 338 pub(super) fn entry(
339 339 &self,
340 340 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
341 341 match self {
342 342 NodeRef::InMemory(_path, node) => {
343 343 Ok(node.data.as_entry().copied())
344 344 }
345 345 NodeRef::OnDisk(node) => node.entry(),
346 346 }
347 347 }
348 348
349 349 pub(super) fn state(
350 350 &self,
351 351 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
352 352 Ok(self.entry()?.and_then(|e| {
353 353 if e.any_tracked() {
354 354 Some(e.state())
355 355 } else {
356 356 None
357 357 }
358 358 }))
359 359 }
360 360
361 361 pub(super) fn cached_directory_mtime(
362 362 &self,
363 363 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
364 364 match self {
365 365 NodeRef::InMemory(_path, node) => Ok(match node.data {
366 366 NodeData::CachedDirectory { mtime } => Some(mtime),
367 367 _ => None,
368 368 }),
369 369 NodeRef::OnDisk(node) => node.cached_directory_mtime(),
370 370 }
371 371 }
372 372
373 373 pub(super) fn descendants_with_entry_count(&self) -> u32 {
374 374 match self {
375 375 NodeRef::InMemory(_path, node) => {
376 376 node.descendants_with_entry_count
377 377 }
378 378 NodeRef::OnDisk(node) => node.descendants_with_entry_count.get(),
379 379 }
380 380 }
381 381
382 382 pub(super) fn tracked_descendants_count(&self) -> u32 {
383 383 match self {
384 384 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
385 385 NodeRef::OnDisk(node) => node.tracked_descendants_count.get(),
386 386 }
387 387 }
388 388 }
389 389
390 390 /// Represents a file or a directory
391 391 #[derive(Default, Debug)]
392 392 pub(super) struct Node<'on_disk> {
393 393 pub(super) data: NodeData,
394 394
395 395 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
396 396
397 397 pub(super) children: ChildNodes<'on_disk>,
398 398
399 399 /// How many (non-inclusive) descendants of this node have an entry.
400 400 pub(super) descendants_with_entry_count: u32,
401 401
402 402 /// How many (non-inclusive) descendants of this node have an entry whose
403 403 /// state is "tracked".
404 404 pub(super) tracked_descendants_count: u32,
405 405 }
406 406
407 407 #[derive(Debug)]
408 408 pub(super) enum NodeData {
409 409 Entry(DirstateEntry),
410 410 CachedDirectory { mtime: TruncatedTimestamp },
411 411 None,
412 412 }
413 413
414 414 impl Default for NodeData {
415 415 fn default() -> Self {
416 416 NodeData::None
417 417 }
418 418 }
419 419
420 420 impl NodeData {
421 421 fn has_entry(&self) -> bool {
422 422 match self {
423 423 NodeData::Entry(_) => true,
424 424 _ => false,
425 425 }
426 426 }
427 427
428 428 fn as_entry(&self) -> Option<&DirstateEntry> {
429 429 match self {
430 430 NodeData::Entry(entry) => Some(entry),
431 431 _ => None,
432 432 }
433 433 }
434 434
435 435 fn as_entry_mut(&mut self) -> Option<&mut DirstateEntry> {
436 436 match self {
437 437 NodeData::Entry(entry) => Some(entry),
438 438 _ => None,
439 439 }
440 440 }
441 441 }
442 442
443 443 impl<'on_disk> DirstateMap<'on_disk> {
444 444 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
445 445 Self {
446 446 on_disk,
447 447 root: ChildNodes::default(),
448 448 nodes_with_entry_count: 0,
449 449 nodes_with_copy_source_count: 0,
450 450 ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
451 451 unreachable_bytes: 0,
452 452 }
453 453 }
454 454
455 455 #[timed]
456 456 pub fn new_v2(
457 457 on_disk: &'on_disk [u8],
458 458 data_size: usize,
459 459 metadata: &[u8],
460 460 ) -> Result<Self, DirstateError> {
461 461 if let Some(data) = on_disk.get(..data_size) {
462 462 Ok(on_disk::read(data, metadata)?)
463 463 } else {
464 464 Err(DirstateV2ParseError.into())
465 465 }
466 466 }
467 467
468 468 #[timed]
469 469 pub fn new_v1(
470 470 on_disk: &'on_disk [u8],
471 471 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
472 472 let mut map = Self::empty(on_disk);
473 473 if map.on_disk.is_empty() {
474 474 return Ok((map, None));
475 475 }
476 476
477 477 let parents = parse_dirstate_entries(
478 478 map.on_disk,
479 479 |path, entry, copy_source| {
480 480 let tracked = entry.state().is_tracked();
481 481 let node = Self::get_or_insert_node(
482 482 map.on_disk,
483 483 &mut map.unreachable_bytes,
484 484 &mut map.root,
485 485 path,
486 486 WithBasename::to_cow_borrowed,
487 487 |ancestor| {
488 488 if tracked {
489 489 ancestor.tracked_descendants_count += 1
490 490 }
491 491 ancestor.descendants_with_entry_count += 1
492 492 },
493 493 )?;
494 494 assert!(
495 495 !node.data.has_entry(),
496 496 "duplicate dirstate entry in read"
497 497 );
498 498 assert!(
499 499 node.copy_source.is_none(),
500 500 "duplicate dirstate entry in read"
501 501 );
502 502 node.data = NodeData::Entry(*entry);
503 503 node.copy_source = copy_source.map(Cow::Borrowed);
504 504 map.nodes_with_entry_count += 1;
505 505 if copy_source.is_some() {
506 506 map.nodes_with_copy_source_count += 1
507 507 }
508 508 Ok(())
509 509 },
510 510 )?;
511 511 let parents = Some(parents.clone());
512 512
513 513 Ok((map, parents))
514 514 }
515 515
516 516 /// Assuming dirstate-v2 format, returns whether the next write should
517 517 /// append to the existing data file that contains `self.on_disk` (true),
518 518 /// or create a new data file from scratch (false).
519 519 pub(super) fn write_should_append(&self) -> bool {
520 520 let ratio = self.unreachable_bytes as f32 / self.on_disk.len() as f32;
521 521 ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
522 522 }
523 523
524 524 fn get_node<'tree>(
525 525 &'tree self,
526 526 path: &HgPath,
527 527 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
528 528 let mut children = self.root.as_ref();
529 529 let mut components = path.components();
530 530 let mut component =
531 531 components.next().expect("expected at least one components");
532 532 loop {
533 533 if let Some(child) = children.get(component, self.on_disk)? {
534 534 if let Some(next_component) = components.next() {
535 535 component = next_component;
536 536 children = child.children(self.on_disk)?;
537 537 } else {
538 538 return Ok(Some(child));
539 539 }
540 540 } else {
541 541 return Ok(None);
542 542 }
543 543 }
544 544 }
545 545
546 546 /// Returns a mutable reference to the node at `path` if it exists
547 547 ///
548 548 /// This takes `root` instead of `&mut self` so that callers can mutate
549 549 /// other fields while the returned borrow is still valid
550 550 fn get_node_mut<'tree>(
551 551 on_disk: &'on_disk [u8],
552 552 unreachable_bytes: &mut u32,
553 553 root: &'tree mut ChildNodes<'on_disk>,
554 554 path: &HgPath,
555 555 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
556 556 let mut children = root;
557 557 let mut components = path.components();
558 558 let mut component =
559 559 components.next().expect("expected at least one components");
560 560 loop {
561 561 if let Some(child) = children
562 562 .make_mut(on_disk, unreachable_bytes)?
563 563 .get_mut(component)
564 564 {
565 565 if let Some(next_component) = components.next() {
566 566 component = next_component;
567 567 children = &mut child.children;
568 568 } else {
569 569 return Ok(Some(child));
570 570 }
571 571 } else {
572 572 return Ok(None);
573 573 }
574 574 }
575 575 }
576 576
577 577 pub(super) fn get_or_insert<'tree, 'path>(
578 578 &'tree mut self,
579 579 path: &HgPath,
580 580 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
581 581 Self::get_or_insert_node(
582 582 self.on_disk,
583 583 &mut self.unreachable_bytes,
584 584 &mut self.root,
585 585 path,
586 586 WithBasename::to_cow_owned,
587 587 |_| {},
588 588 )
589 589 }
590 590
591 591 fn get_or_insert_node<'tree, 'path>(
592 592 on_disk: &'on_disk [u8],
593 593 unreachable_bytes: &mut u32,
594 594 root: &'tree mut ChildNodes<'on_disk>,
595 595 path: &'path HgPath,
596 596 to_cow: impl Fn(
597 597 WithBasename<&'path HgPath>,
598 598 ) -> WithBasename<Cow<'on_disk, HgPath>>,
599 599 mut each_ancestor: impl FnMut(&mut Node),
600 600 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
601 601 let mut child_nodes = root;
602 602 let mut inclusive_ancestor_paths =
603 603 WithBasename::inclusive_ancestors_of(path);
604 604 let mut ancestor_path = inclusive_ancestor_paths
605 605 .next()
606 606 .expect("expected at least one inclusive ancestor");
607 607 loop {
608 608 let (_, child_node) = child_nodes
609 609 .make_mut(on_disk, unreachable_bytes)?
610 610 .raw_entry_mut()
611 611 .from_key(ancestor_path.base_name())
612 612 .or_insert_with(|| (to_cow(ancestor_path), Node::default()));
613 613 if let Some(next) = inclusive_ancestor_paths.next() {
614 614 each_ancestor(child_node);
615 615 ancestor_path = next;
616 616 child_nodes = &mut child_node.children;
617 617 } else {
618 618 return Ok(child_node);
619 619 }
620 620 }
621 621 }
622 622
623 623 fn reset_state(
624 624 &mut self,
625 625 filename: &HgPath,
626 626 old_entry_opt: Option<DirstateEntry>,
627 627 wc_tracked: bool,
628 628 p1_tracked: bool,
629 629 p2_info: bool,
630 630 has_meaningful_mtime: bool,
631 631 parent_file_data_opt: Option<ParentFileData>,
632 632 ) -> Result<(), DirstateError> {
633 633 let (had_entry, was_tracked) = match old_entry_opt {
634 634 Some(old_entry) => (true, old_entry.tracked()),
635 635 None => (false, false),
636 636 };
637 637 let node = Self::get_or_insert_node(
638 638 self.on_disk,
639 639 &mut self.unreachable_bytes,
640 640 &mut self.root,
641 641 filename,
642 642 WithBasename::to_cow_owned,
643 643 |ancestor| {
644 644 if !had_entry {
645 645 ancestor.descendants_with_entry_count += 1;
646 646 }
647 647 if was_tracked {
648 648 if !wc_tracked {
649 649 ancestor.tracked_descendants_count = ancestor
650 650 .tracked_descendants_count
651 651 .checked_sub(1)
652 652 .expect("tracked count to be >= 0");
653 653 }
654 654 } else {
655 655 if wc_tracked {
656 656 ancestor.tracked_descendants_count += 1;
657 657 }
658 658 }
659 659 },
660 660 )?;
661 661
662 662 let v2_data = if let Some(parent_file_data) = parent_file_data_opt {
663 663 DirstateV2Data {
664 664 wc_tracked,
665 665 p1_tracked,
666 666 p2_info,
667 667 mode_size: parent_file_data.mode_size,
668 668 mtime: if has_meaningful_mtime {
669 669 parent_file_data.mtime
670 670 } else {
671 671 None
672 672 },
673 673 ..Default::default()
674 674 }
675 675 } else {
676 676 DirstateV2Data {
677 677 wc_tracked,
678 678 p1_tracked,
679 679 p2_info,
680 680 ..Default::default()
681 681 }
682 682 };
683 683 if !had_entry {
684 684 self.nodes_with_entry_count += 1;
685 685 }
686 686 node.data = NodeData::Entry(DirstateEntry::from_v2_data(v2_data));
687 687 Ok(())
688 688 }
689 689
690 690 fn set_tracked(
691 691 &mut self,
692 692 filename: &HgPath,
693 693 old_entry_opt: Option<DirstateEntry>,
694 694 ) -> Result<bool, DirstateV2ParseError> {
695 695 let was_tracked = old_entry_opt.map_or(false, |e| e.tracked());
696 696 let had_entry = old_entry_opt.is_some();
697 697 let tracked_count_increment = if was_tracked { 0 } else { 1 };
698 698 let mut new = false;
699 699
700 700 let node = Self::get_or_insert_node(
701 701 self.on_disk,
702 702 &mut self.unreachable_bytes,
703 703 &mut self.root,
704 704 filename,
705 705 WithBasename::to_cow_owned,
706 706 |ancestor| {
707 707 if !had_entry {
708 708 ancestor.descendants_with_entry_count += 1;
709 709 }
710 710
711 711 ancestor.tracked_descendants_count += tracked_count_increment;
712 712 },
713 713 )?;
714 714 let new_entry = if let Some(old_entry) = old_entry_opt {
715 715 let mut e = old_entry.clone();
716 716 if e.tracked() {
717 717 // XXX
718 718 // This is probably overkill for more case, but we need this to
719 719 // fully replace the `normallookup` call with `set_tracked`
720 720 // one. Consider smoothing this in the future.
721 721 e.set_possibly_dirty();
722 722 } else {
723 723 new = true;
724 724 e.set_tracked();
725 725 }
726 726 e
727 727 } else {
728 728 self.nodes_with_entry_count += 1;
729 729 new = true;
730 730 DirstateEntry::new_tracked()
731 731 };
732 732 node.data = NodeData::Entry(new_entry);
733 733 Ok(new)
734 734 }
735 735
736 736 /// It is the responsibility of the caller to know that there was an entry
737 737 /// there before. Does not handle the removal of copy source
738 738 fn set_untracked(
739 739 &mut self,
740 740 filename: &HgPath,
741 741 old_entry: DirstateEntry,
742 742 ) -> Result<(), DirstateV2ParseError> {
743 743 let node = Self::get_or_insert_node(
744 744 self.on_disk,
745 745 &mut self.unreachable_bytes,
746 746 &mut self.root,
747 747 filename,
748 748 WithBasename::to_cow_owned,
749 749 |ancestor| {
750 750 ancestor.tracked_descendants_count = ancestor
751 751 .tracked_descendants_count
752 752 .checked_sub(1)
753 753 .expect("tracked_descendants_count should be >= 0");
754 754 },
755 755 )?;
756 756 let mut new_entry = old_entry.clone();
757 757 new_entry.set_untracked();
758 758 node.data = NodeData::Entry(new_entry);
759 759 Ok(())
760 760 }
761 761
762 762 fn set_clean(
763 763 &mut self,
764 764 filename: &HgPath,
765 765 old_entry: DirstateEntry,
766 766 mode: u32,
767 767 size: u32,
768 768 mtime: TruncatedTimestamp,
769 769 ) -> Result<(), DirstateError> {
770 770 let node = Self::get_or_insert_node(
771 771 self.on_disk,
772 772 &mut self.unreachable_bytes,
773 773 &mut self.root,
774 774 filename,
775 775 WithBasename::to_cow_owned,
776 776 |ancestor| {
777 777 if !old_entry.tracked() {
778 778 ancestor.tracked_descendants_count += 1;
779 779 }
780 780 },
781 781 )?;
782 782 let mut new_entry = old_entry.clone();
783 783 new_entry.set_clean(mode, size, mtime);
784 784 node.data = NodeData::Entry(new_entry);
785 785 Ok(())
786 786 }
787 787
788 788 fn set_possibly_dirty(
789 789 &mut self,
790 790 filename: &HgPath,
791 791 ) -> Result<(), DirstateError> {
792 792 let node = Self::get_or_insert_node(
793 793 self.on_disk,
794 794 &mut self.unreachable_bytes,
795 795 &mut self.root,
796 796 filename,
797 797 WithBasename::to_cow_owned,
798 798 |_ancestor| {},
799 799 )?;
800 800 let entry = node.data.as_entry_mut().expect("entry should exist");
801 801 entry.set_possibly_dirty();
802 802 node.data = NodeData::Entry(*entry);
803 803 Ok(())
804 804 }
805 805
806 806 /// Clears the cached mtime for the (potential) folder at `path`.
807 807 pub(super) fn clear_cached_mtime(
808 808 &mut self,
809 809 path: &HgPath,
810 810 ) -> Result<(), DirstateV2ParseError> {
811 811 let node = match DirstateMap::get_node_mut(
812 812 self.on_disk,
813 813 &mut self.unreachable_bytes,
814 814 &mut self.root,
815 815 path,
816 816 )? {
817 817 Some(node) => node,
818 818 None => return Ok(()),
819 819 };
820 820 if let NodeData::CachedDirectory { .. } = &node.data {
821 821 node.data = NodeData::None
822 822 }
823 823 Ok(())
824 824 }
825 825
826 /// Sets the cached mtime for the (potential) folder at `path`.
827 pub(super) fn set_cached_mtime(
828 &mut self,
829 path: &HgPath,
830 mtime: TruncatedTimestamp,
831 ) -> Result<(), DirstateV2ParseError> {
832 let node = match DirstateMap::get_node_mut(
833 self.on_disk,
834 &mut self.unreachable_bytes,
835 &mut self.root,
836 path,
837 )? {
838 Some(node) => node,
839 None => return Ok(()),
840 };
841 match &node.data {
842 NodeData::Entry(_) => {} // Don’t overwrite an entry
843 NodeData::CachedDirectory { .. } | NodeData::None => {
844 node.data = NodeData::CachedDirectory { mtime }
845 }
846 }
847 Ok(())
848 }
849
826 850 fn iter_nodes<'tree>(
827 851 &'tree self,
828 852 ) -> impl Iterator<
829 853 Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>,
830 854 > + 'tree {
831 855 // Depth first tree traversal.
832 856 //
833 857 // If we could afford internal iteration and recursion,
834 858 // this would look like:
835 859 //
836 860 // ```
837 861 // fn traverse_children(
838 862 // children: &ChildNodes,
839 863 // each: &mut impl FnMut(&Node),
840 864 // ) {
841 865 // for child in children.values() {
842 866 // traverse_children(&child.children, each);
843 867 // each(child);
844 868 // }
845 869 // }
846 870 // ```
847 871 //
848 872 // However we want an external iterator and therefore can’t use the
849 873 // call stack. Use an explicit stack instead:
850 874 let mut stack = Vec::new();
851 875 let mut iter = self.root.as_ref().iter();
852 876 std::iter::from_fn(move || {
853 877 while let Some(child_node) = iter.next() {
854 878 let children = match child_node.children(self.on_disk) {
855 879 Ok(children) => children,
856 880 Err(error) => return Some(Err(error)),
857 881 };
858 882 // Pseudo-recursion
859 883 let new_iter = children.iter();
860 884 let old_iter = std::mem::replace(&mut iter, new_iter);
861 885 stack.push((child_node, old_iter));
862 886 }
863 887 // Found the end of a `children.iter()` iterator.
864 888 if let Some((child_node, next_iter)) = stack.pop() {
865 889 // "Return" from pseudo-recursion by restoring state from the
866 890 // explicit stack
867 891 iter = next_iter;
868 892
869 893 Some(Ok(child_node))
870 894 } else {
871 895 // Reached the bottom of the stack, we’re done
872 896 None
873 897 }
874 898 })
875 899 }
876 900
877 901 fn count_dropped_path(unreachable_bytes: &mut u32, path: &Cow<HgPath>) {
878 902 if let Cow::Borrowed(path) = path {
879 903 *unreachable_bytes += path.len() as u32
880 904 }
881 905 }
882 906 }
883 907
884 908 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
885 909 ///
886 910 /// The callback is only called for incoming `Ok` values. Errors are passed
887 911 /// through as-is. In order to let it use the `?` operator the callback is
888 912 /// expected to return a `Result` of `Option`, instead of an `Option` of
889 913 /// `Result`.
890 914 fn filter_map_results<'a, I, F, A, B, E>(
891 915 iter: I,
892 916 f: F,
893 917 ) -> impl Iterator<Item = Result<B, E>> + 'a
894 918 where
895 919 I: Iterator<Item = Result<A, E>> + 'a,
896 920 F: Fn(A) -> Result<Option<B>, E> + 'a,
897 921 {
898 922 iter.filter_map(move |result| match result {
899 923 Ok(node) => f(node).transpose(),
900 924 Err(e) => Some(Err(e)),
901 925 })
902 926 }
903 927
904 928 impl OwningDirstateMap {
905 929 pub fn clear(&mut self) {
906 930 self.with_dmap_mut(|map| {
907 931 map.root = Default::default();
908 932 map.nodes_with_entry_count = 0;
909 933 map.nodes_with_copy_source_count = 0;
910 934 });
911 935 }
912 936
913 937 pub fn set_tracked(
914 938 &mut self,
915 939 filename: &HgPath,
916 940 ) -> Result<bool, DirstateV2ParseError> {
917 941 let old_entry_opt = self.get(filename)?;
918 942 self.with_dmap_mut(|map| map.set_tracked(filename, old_entry_opt))
919 943 }
920 944
921 945 pub fn set_untracked(
922 946 &mut self,
923 947 filename: &HgPath,
924 948 ) -> Result<bool, DirstateError> {
925 949 let old_entry_opt = self.get(filename)?;
926 950 match old_entry_opt {
927 951 None => Ok(false),
928 952 Some(old_entry) => {
929 953 if !old_entry.tracked() {
930 954 // `DirstateMap::set_untracked` is not a noop if
931 955 // already not tracked as it will decrement the
932 956 // tracked counters while going down.
933 957 return Ok(true);
934 958 }
935 959 if old_entry.added() {
936 960 // Untracking an "added" entry will just result in a
937 961 // worthless entry (and other parts of the code will
938 962 // complain about it), just drop it entirely.
939 963 self.drop_entry_and_copy_source(filename)?;
940 964 return Ok(true);
941 965 }
942 966 if !old_entry.p2_info() {
943 967 self.copy_map_remove(filename)?;
944 968 }
945 969
946 970 self.with_dmap_mut(|map| {
947 971 map.set_untracked(filename, old_entry)?;
948 972 Ok(true)
949 973 })
950 974 }
951 975 }
952 976 }
953 977
954 978 pub fn set_clean(
955 979 &mut self,
956 980 filename: &HgPath,
957 981 mode: u32,
958 982 size: u32,
959 983 mtime: TruncatedTimestamp,
960 984 ) -> Result<(), DirstateError> {
961 985 let old_entry = match self.get(filename)? {
962 986 None => {
963 987 return Err(
964 988 DirstateMapError::PathNotFound(filename.into()).into()
965 989 )
966 990 }
967 991 Some(e) => e,
968 992 };
969 993 self.copy_map_remove(filename)?;
970 994 self.with_dmap_mut(|map| {
971 995 map.set_clean(filename, old_entry, mode, size, mtime)
972 996 })
973 997 }
974 998
975 999 pub fn set_possibly_dirty(
976 1000 &mut self,
977 1001 filename: &HgPath,
978 1002 ) -> Result<(), DirstateError> {
979 1003 if self.get(filename)?.is_none() {
980 1004 return Err(DirstateMapError::PathNotFound(filename.into()).into());
981 1005 }
982 1006 self.with_dmap_mut(|map| map.set_possibly_dirty(filename))
983 1007 }
984 1008
985 1009 pub fn reset_state(
986 1010 &mut self,
987 1011 filename: &HgPath,
988 1012 wc_tracked: bool,
989 1013 p1_tracked: bool,
990 1014 p2_info: bool,
991 1015 has_meaningful_mtime: bool,
992 1016 parent_file_data_opt: Option<ParentFileData>,
993 1017 ) -> Result<(), DirstateError> {
994 1018 if !(p1_tracked || p2_info || wc_tracked) {
995 1019 self.drop_entry_and_copy_source(filename)?;
996 1020 return Ok(());
997 1021 }
998 1022 self.copy_map_remove(filename)?;
999 1023 let old_entry_opt = self.get(filename)?;
1000 1024 self.with_dmap_mut(|map| {
1001 1025 map.reset_state(
1002 1026 filename,
1003 1027 old_entry_opt,
1004 1028 wc_tracked,
1005 1029 p1_tracked,
1006 1030 p2_info,
1007 1031 has_meaningful_mtime,
1008 1032 parent_file_data_opt,
1009 1033 )
1010 1034 })
1011 1035 }
1012 1036
1013 1037 pub fn drop_entry_and_copy_source(
1014 1038 &mut self,
1015 1039 filename: &HgPath,
1016 1040 ) -> Result<(), DirstateError> {
1017 1041 let was_tracked = self.get(filename)?.map_or(false, |e| e.tracked());
1018 1042 struct Dropped {
1019 1043 was_tracked: bool,
1020 1044 had_entry: bool,
1021 1045 had_copy_source: bool,
1022 1046 }
1023 1047
1024 1048 /// If this returns `Ok(Some((dropped, removed)))`, then
1025 1049 ///
1026 1050 /// * `dropped` is about the leaf node that was at `filename`
1027 1051 /// * `removed` is whether this particular level of recursion just
1028 1052 /// removed a node in `nodes`.
1029 1053 fn recur<'on_disk>(
1030 1054 on_disk: &'on_disk [u8],
1031 1055 unreachable_bytes: &mut u32,
1032 1056 nodes: &mut ChildNodes<'on_disk>,
1033 1057 path: &HgPath,
1034 1058 ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
1035 1059 let (first_path_component, rest_of_path) =
1036 1060 path.split_first_component();
1037 1061 let nodes = nodes.make_mut(on_disk, unreachable_bytes)?;
1038 1062 let node = if let Some(node) = nodes.get_mut(first_path_component)
1039 1063 {
1040 1064 node
1041 1065 } else {
1042 1066 return Ok(None);
1043 1067 };
1044 1068 let dropped;
1045 1069 if let Some(rest) = rest_of_path {
1046 1070 if let Some((d, removed)) = recur(
1047 1071 on_disk,
1048 1072 unreachable_bytes,
1049 1073 &mut node.children,
1050 1074 rest,
1051 1075 )? {
1052 1076 dropped = d;
1053 1077 if dropped.had_entry {
1054 1078 node.descendants_with_entry_count = node
1055 1079 .descendants_with_entry_count
1056 1080 .checked_sub(1)
1057 1081 .expect(
1058 1082 "descendants_with_entry_count should be >= 0",
1059 1083 );
1060 1084 }
1061 1085 if dropped.was_tracked {
1062 1086 node.tracked_descendants_count = node
1063 1087 .tracked_descendants_count
1064 1088 .checked_sub(1)
1065 1089 .expect(
1066 1090 "tracked_descendants_count should be >= 0",
1067 1091 );
1068 1092 }
1069 1093
1070 1094 // Directory caches must be invalidated when removing a
1071 1095 // child node
1072 1096 if removed {
1073 1097 if let NodeData::CachedDirectory { .. } = &node.data {
1074 1098 node.data = NodeData::None
1075 1099 }
1076 1100 }
1077 1101 } else {
1078 1102 return Ok(None);
1079 1103 }
1080 1104 } else {
1081 1105 let entry = node.data.as_entry();
1082 1106 let was_tracked = entry.map_or(false, |entry| entry.tracked());
1083 1107 let had_entry = entry.is_some();
1084 1108 if had_entry {
1085 1109 node.data = NodeData::None
1086 1110 }
1087 1111 let mut had_copy_source = false;
1088 1112 if let Some(source) = &node.copy_source {
1089 1113 DirstateMap::count_dropped_path(unreachable_bytes, source);
1090 1114 had_copy_source = true;
1091 1115 node.copy_source = None
1092 1116 }
1093 1117 dropped = Dropped {
1094 1118 was_tracked,
1095 1119 had_entry,
1096 1120 had_copy_source,
1097 1121 };
1098 1122 }
1099 1123 // After recursion, for both leaf (rest_of_path is None) nodes and
1100 1124 // parent nodes, remove a node if it just became empty.
1101 1125 let remove = !node.data.has_entry()
1102 1126 && node.copy_source.is_none()
1103 1127 && node.children.is_empty();
1104 1128 if remove {
1105 1129 let (key, _) =
1106 1130 nodes.remove_entry(first_path_component).unwrap();
1107 1131 DirstateMap::count_dropped_path(
1108 1132 unreachable_bytes,
1109 1133 key.full_path(),
1110 1134 )
1111 1135 }
1112 1136 Ok(Some((dropped, remove)))
1113 1137 }
1114 1138
1115 1139 self.with_dmap_mut(|map| {
1116 1140 if let Some((dropped, _removed)) = recur(
1117 1141 map.on_disk,
1118 1142 &mut map.unreachable_bytes,
1119 1143 &mut map.root,
1120 1144 filename,
1121 1145 )? {
1122 1146 if dropped.had_entry {
1123 1147 map.nodes_with_entry_count = map
1124 1148 .nodes_with_entry_count
1125 1149 .checked_sub(1)
1126 1150 .expect("nodes_with_entry_count should be >= 0");
1127 1151 }
1128 1152 if dropped.had_copy_source {
1129 1153 map.nodes_with_copy_source_count = map
1130 1154 .nodes_with_copy_source_count
1131 1155 .checked_sub(1)
1132 1156 .expect("nodes_with_copy_source_count should be >= 0");
1133 1157 }
1134 1158 } else {
1135 1159 debug_assert!(!was_tracked);
1136 1160 }
1137 1161 Ok(())
1138 1162 })
1139 1163 }
1140 1164
1141 1165 pub fn has_tracked_dir(
1142 1166 &mut self,
1143 1167 directory: &HgPath,
1144 1168 ) -> Result<bool, DirstateError> {
1145 1169 self.with_dmap_mut(|map| {
1146 1170 if let Some(node) = map.get_node(directory)? {
1147 1171 // A node without a `DirstateEntry` was created to hold child
1148 1172 // nodes, and is therefore a directory.
1149 1173 let state = node.state()?;
1150 1174 Ok(state.is_none() && node.tracked_descendants_count() > 0)
1151 1175 } else {
1152 1176 Ok(false)
1153 1177 }
1154 1178 })
1155 1179 }
1156 1180
1157 1181 pub fn has_dir(
1158 1182 &mut self,
1159 1183 directory: &HgPath,
1160 1184 ) -> Result<bool, DirstateError> {
1161 1185 self.with_dmap_mut(|map| {
1162 1186 if let Some(node) = map.get_node(directory)? {
1163 1187 // A node without a `DirstateEntry` was created to hold child
1164 1188 // nodes, and is therefore a directory.
1165 1189 let state = node.state()?;
1166 1190 Ok(state.is_none() && node.descendants_with_entry_count() > 0)
1167 1191 } else {
1168 1192 Ok(false)
1169 1193 }
1170 1194 })
1171 1195 }
1172 1196
1173 1197 #[timed]
1174 1198 pub fn pack_v1(
1175 1199 &self,
1176 1200 parents: DirstateParents,
1177 1201 ) -> Result<Vec<u8>, DirstateError> {
1178 1202 let map = self.get_map();
1179 1203 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
1180 1204 // reallocations
1181 1205 let mut size = parents.as_bytes().len();
1182 1206 for node in map.iter_nodes() {
1183 1207 let node = node?;
1184 1208 if node.entry()?.is_some() {
1185 1209 size += packed_entry_size(
1186 1210 node.full_path(map.on_disk)?,
1187 1211 node.copy_source(map.on_disk)?,
1188 1212 );
1189 1213 }
1190 1214 }
1191 1215
1192 1216 let mut packed = Vec::with_capacity(size);
1193 1217 packed.extend(parents.as_bytes());
1194 1218
1195 1219 for node in map.iter_nodes() {
1196 1220 let node = node?;
1197 1221 if let Some(entry) = node.entry()? {
1198 1222 pack_entry(
1199 1223 node.full_path(map.on_disk)?,
1200 1224 &entry,
1201 1225 node.copy_source(map.on_disk)?,
1202 1226 &mut packed,
1203 1227 );
1204 1228 }
1205 1229 }
1206 1230 Ok(packed)
1207 1231 }
1208 1232
1209 1233 /// Returns new data and metadata together with whether that data should be
1210 1234 /// appended to the existing data file whose content is at
1211 1235 /// `map.on_disk` (true), instead of written to a new data file
1212 1236 /// (false).
1213 1237 #[timed]
1214 1238 pub fn pack_v2(
1215 1239 &self,
1216 1240 can_append: bool,
1217 1241 ) -> Result<(Vec<u8>, on_disk::TreeMetadata, bool), DirstateError> {
1218 1242 let map = self.get_map();
1219 1243 on_disk::write(map, can_append)
1220 1244 }
1221 1245
1222 1246 /// `callback` allows the caller to process and do something with the
1223 1247 /// results of the status. This is needed to do so efficiently (i.e.
1224 1248 /// without cloning the `DirstateStatus` object with its paths) because
1225 1249 /// we need to borrow from `Self`.
1226 1250 pub fn with_status<R>(
1227 1251 &mut self,
1228 1252 matcher: &(dyn Matcher + Sync),
1229 1253 root_dir: PathBuf,
1230 1254 ignore_files: Vec<PathBuf>,
1231 1255 options: StatusOptions,
1232 1256 callback: impl for<'r> FnOnce(
1233 1257 Result<(DirstateStatus<'r>, Vec<PatternFileWarning>), StatusError>,
1234 1258 ) -> R,
1235 1259 ) -> R {
1236 1260 self.with_dmap_mut(|map| {
1237 1261 callback(super::status::status(
1238 1262 map,
1239 1263 matcher,
1240 1264 root_dir,
1241 1265 ignore_files,
1242 1266 options,
1243 1267 ))
1244 1268 })
1245 1269 }
1246 1270
1247 1271 pub fn copy_map_len(&self) -> usize {
1248 1272 let map = self.get_map();
1249 1273 map.nodes_with_copy_source_count as usize
1250 1274 }
1251 1275
1252 1276 pub fn copy_map_iter(&self) -> CopyMapIter<'_> {
1253 1277 let map = self.get_map();
1254 1278 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1255 1279 Ok(if let Some(source) = node.copy_source(map.on_disk)? {
1256 1280 Some((node.full_path(map.on_disk)?, source))
1257 1281 } else {
1258 1282 None
1259 1283 })
1260 1284 }))
1261 1285 }
1262 1286
1263 1287 pub fn copy_map_contains_key(
1264 1288 &self,
1265 1289 key: &HgPath,
1266 1290 ) -> Result<bool, DirstateV2ParseError> {
1267 1291 let map = self.get_map();
1268 1292 Ok(if let Some(node) = map.get_node(key)? {
1269 1293 node.has_copy_source()
1270 1294 } else {
1271 1295 false
1272 1296 })
1273 1297 }
1274 1298
1275 1299 pub fn copy_map_get(
1276 1300 &self,
1277 1301 key: &HgPath,
1278 1302 ) -> Result<Option<&HgPath>, DirstateV2ParseError> {
1279 1303 let map = self.get_map();
1280 1304 if let Some(node) = map.get_node(key)? {
1281 1305 if let Some(source) = node.copy_source(map.on_disk)? {
1282 1306 return Ok(Some(source));
1283 1307 }
1284 1308 }
1285 1309 Ok(None)
1286 1310 }
1287 1311
1288 1312 pub fn copy_map_remove(
1289 1313 &mut self,
1290 1314 key: &HgPath,
1291 1315 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1292 1316 self.with_dmap_mut(|map| {
1293 1317 let count = &mut map.nodes_with_copy_source_count;
1294 1318 let unreachable_bytes = &mut map.unreachable_bytes;
1295 1319 Ok(DirstateMap::get_node_mut(
1296 1320 map.on_disk,
1297 1321 unreachable_bytes,
1298 1322 &mut map.root,
1299 1323 key,
1300 1324 )?
1301 1325 .and_then(|node| {
1302 1326 if let Some(source) = &node.copy_source {
1303 1327 *count -= 1;
1304 1328 DirstateMap::count_dropped_path(unreachable_bytes, source);
1305 1329 }
1306 1330 node.copy_source.take().map(Cow::into_owned)
1307 1331 }))
1308 1332 })
1309 1333 }
1310 1334
1311 1335 pub fn copy_map_insert(
1312 1336 &mut self,
1313 1337 key: &HgPath,
1314 1338 value: &HgPath,
1315 1339 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1316 1340 self.with_dmap_mut(|map| {
1317 1341 let node = DirstateMap::get_or_insert_node(
1318 1342 map.on_disk,
1319 1343 &mut map.unreachable_bytes,
1320 1344 &mut map.root,
1321 1345 &key,
1322 1346 WithBasename::to_cow_owned,
1323 1347 |_ancestor| {},
1324 1348 )?;
1325 1349 if node.copy_source.is_none() {
1326 1350 map.nodes_with_copy_source_count += 1
1327 1351 }
1328 1352 Ok(node
1329 1353 .copy_source
1330 1354 .replace(value.to_owned().into())
1331 1355 .map(Cow::into_owned))
1332 1356 })
1333 1357 }
1334 1358
1335 1359 pub fn len(&self) -> usize {
1336 1360 let map = self.get_map();
1337 1361 map.nodes_with_entry_count as usize
1338 1362 }
1339 1363
1340 1364 pub fn contains_key(
1341 1365 &self,
1342 1366 key: &HgPath,
1343 1367 ) -> Result<bool, DirstateV2ParseError> {
1344 1368 Ok(self.get(key)?.is_some())
1345 1369 }
1346 1370
1347 1371 pub fn get(
1348 1372 &self,
1349 1373 key: &HgPath,
1350 1374 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
1351 1375 let map = self.get_map();
1352 1376 Ok(if let Some(node) = map.get_node(key)? {
1353 1377 node.entry()?
1354 1378 } else {
1355 1379 None
1356 1380 })
1357 1381 }
1358 1382
1359 1383 pub fn iter(&self) -> StateMapIter<'_> {
1360 1384 let map = self.get_map();
1361 1385 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1362 1386 Ok(if let Some(entry) = node.entry()? {
1363 1387 Some((node.full_path(map.on_disk)?, entry))
1364 1388 } else {
1365 1389 None
1366 1390 })
1367 1391 }))
1368 1392 }
1369 1393
1370 1394 pub fn iter_tracked_dirs(
1371 1395 &mut self,
1372 1396 ) -> Result<
1373 1397 Box<
1374 1398 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
1375 1399 + Send
1376 1400 + '_,
1377 1401 >,
1378 1402 DirstateError,
1379 1403 > {
1380 1404 let map = self.get_map();
1381 1405 let on_disk = map.on_disk;
1382 1406 Ok(Box::new(filter_map_results(
1383 1407 map.iter_nodes(),
1384 1408 move |node| {
1385 1409 Ok(if node.tracked_descendants_count() > 0 {
1386 1410 Some(node.full_path(on_disk)?)
1387 1411 } else {
1388 1412 None
1389 1413 })
1390 1414 },
1391 1415 )))
1392 1416 }
1393 1417
1394 1418 /// Only public because it needs to be exposed to the Python layer.
1395 1419 /// It is not the full `setparents` logic, only the parts that mutate the
1396 1420 /// entries.
1397 1421 pub fn setparents_fixup(
1398 1422 &mut self,
1399 1423 ) -> Result<Vec<(HgPathBuf, HgPathBuf)>, DirstateV2ParseError> {
1400 1424 // XXX
1401 1425 // All the copying and re-querying is quite inefficient, but this is
1402 1426 // still a lot better than doing it from Python.
1403 1427 //
1404 1428 // The better solution is to develop a mechanism for `iter_mut`,
1405 1429 // which will be a lot more involved: we're dealing with a lazy,
1406 1430 // append-mostly, tree-like data structure. This will do for now.
1407 1431 let mut copies = vec![];
1408 1432 let mut files_with_p2_info = vec![];
1409 1433 for res in self.iter() {
1410 1434 let (path, entry) = res?;
1411 1435 if entry.p2_info() {
1412 1436 files_with_p2_info.push(path.to_owned())
1413 1437 }
1414 1438 }
1415 1439 self.with_dmap_mut(|map| {
1416 1440 for path in files_with_p2_info.iter() {
1417 1441 let node = map.get_or_insert(path)?;
1418 1442 let entry =
1419 1443 node.data.as_entry_mut().expect("entry should exist");
1420 1444 entry.drop_merge_data();
1421 1445 if let Some(source) = node.copy_source.take().as_deref() {
1422 1446 copies.push((path.to_owned(), source.to_owned()));
1423 1447 }
1424 1448 }
1425 1449 Ok(copies)
1426 1450 })
1427 1451 }
1428 1452
1429 1453 pub fn debug_iter(
1430 1454 &self,
1431 1455 all: bool,
1432 1456 ) -> Box<
1433 1457 dyn Iterator<
1434 1458 Item = Result<
1435 1459 (&HgPath, (u8, i32, i32, i32)),
1436 1460 DirstateV2ParseError,
1437 1461 >,
1438 1462 > + Send
1439 1463 + '_,
1440 1464 > {
1441 1465 let map = self.get_map();
1442 1466 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1443 1467 let debug_tuple = if let Some(entry) = node.entry()? {
1444 1468 entry.debug_tuple()
1445 1469 } else if !all {
1446 1470 return Ok(None);
1447 1471 } else if let Some(mtime) = node.cached_directory_mtime()? {
1448 1472 (b' ', 0, -1, mtime.truncated_seconds() as i32)
1449 1473 } else {
1450 1474 (b' ', 0, -1, -1)
1451 1475 };
1452 1476 Ok(Some((node.full_path(map.on_disk)?, debug_tuple)))
1453 1477 }))
1454 1478 }
1455 1479 }
1456 1480 #[cfg(test)]
1457 1481 mod tests {
1458 1482 use super::*;
1459 1483
1460 1484 /// Shortcut to return tracked descendants of a path.
1461 1485 /// Panics if the path does not exist.
1462 1486 fn tracked_descendants(map: &OwningDirstateMap, path: &[u8]) -> u32 {
1463 1487 let path = dbg!(HgPath::new(path));
1464 1488 let node = map.get_map().get_node(path);
1465 1489 node.unwrap().unwrap().tracked_descendants_count()
1466 1490 }
1467 1491
1468 1492 /// Shortcut to return descendants with an entry.
1469 1493 /// Panics if the path does not exist.
1470 1494 fn descendants_with_an_entry(map: &OwningDirstateMap, path: &[u8]) -> u32 {
1471 1495 let path = dbg!(HgPath::new(path));
1472 1496 let node = map.get_map().get_node(path);
1473 1497 node.unwrap().unwrap().descendants_with_entry_count()
1474 1498 }
1475 1499
1476 1500 fn assert_does_not_exist(map: &OwningDirstateMap, path: &[u8]) {
1477 1501 let path = dbg!(HgPath::new(path));
1478 1502 let node = map.get_map().get_node(path);
1479 1503 assert!(node.unwrap().is_none());
1480 1504 }
1481 1505
1482 1506 /// Shortcut for path creation in tests
1483 1507 fn p(b: &[u8]) -> &HgPath {
1484 1508 HgPath::new(b)
1485 1509 }
1486 1510
1487 1511 /// Test the very simple case a single tracked file
1488 1512 #[test]
1489 1513 fn test_tracked_descendants_simple() -> Result<(), DirstateError> {
1490 1514 let mut map = OwningDirstateMap::new_empty(vec![]);
1491 1515 assert_eq!(map.len(), 0);
1492 1516
1493 1517 map.set_tracked(p(b"some/nested/path"))?;
1494 1518
1495 1519 assert_eq!(map.len(), 1);
1496 1520 assert_eq!(tracked_descendants(&map, b"some"), 1);
1497 1521 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1498 1522 assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0);
1499 1523
1500 1524 map.set_untracked(p(b"some/nested/path"))?;
1501 1525 assert_eq!(map.len(), 0);
1502 1526 assert!(map.get_map().get_node(p(b"some"))?.is_none());
1503 1527
1504 1528 Ok(())
1505 1529 }
1506 1530
1507 1531 /// Test the simple case of all tracked, but multiple files
1508 1532 #[test]
1509 1533 fn test_tracked_descendants_multiple() -> Result<(), DirstateError> {
1510 1534 let mut map = OwningDirstateMap::new_empty(vec![]);
1511 1535
1512 1536 map.set_tracked(p(b"some/nested/path"))?;
1513 1537 map.set_tracked(p(b"some/nested/file"))?;
1514 1538 // one layer without any files to test deletion cascade
1515 1539 map.set_tracked(p(b"some/other/nested/path"))?;
1516 1540 map.set_tracked(p(b"root_file"))?;
1517 1541 map.set_tracked(p(b"some/file"))?;
1518 1542 map.set_tracked(p(b"some/file2"))?;
1519 1543 map.set_tracked(p(b"some/file3"))?;
1520 1544
1521 1545 assert_eq!(map.len(), 7);
1522 1546 assert_eq!(tracked_descendants(&map, b"some"), 6);
1523 1547 assert_eq!(tracked_descendants(&map, b"some/nested"), 2);
1524 1548 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1525 1549 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1526 1550 assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0);
1527 1551
1528 1552 map.set_untracked(p(b"some/nested/path"))?;
1529 1553 assert_eq!(map.len(), 6);
1530 1554 assert_eq!(tracked_descendants(&map, b"some"), 5);
1531 1555 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1532 1556 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1533 1557 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1534 1558
1535 1559 map.set_untracked(p(b"some/nested/file"))?;
1536 1560 assert_eq!(map.len(), 5);
1537 1561 assert_eq!(tracked_descendants(&map, b"some"), 4);
1538 1562 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1539 1563 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1540 1564 assert_does_not_exist(&map, b"some_nested");
1541 1565
1542 1566 map.set_untracked(p(b"some/other/nested/path"))?;
1543 1567 assert_eq!(map.len(), 4);
1544 1568 assert_eq!(tracked_descendants(&map, b"some"), 3);
1545 1569 assert_does_not_exist(&map, b"some/other");
1546 1570
1547 1571 map.set_untracked(p(b"root_file"))?;
1548 1572 assert_eq!(map.len(), 3);
1549 1573 assert_eq!(tracked_descendants(&map, b"some"), 3);
1550 1574 assert_does_not_exist(&map, b"root_file");
1551 1575
1552 1576 map.set_untracked(p(b"some/file"))?;
1553 1577 assert_eq!(map.len(), 2);
1554 1578 assert_eq!(tracked_descendants(&map, b"some"), 2);
1555 1579 assert_does_not_exist(&map, b"some/file");
1556 1580
1557 1581 map.set_untracked(p(b"some/file2"))?;
1558 1582 assert_eq!(map.len(), 1);
1559 1583 assert_eq!(tracked_descendants(&map, b"some"), 1);
1560 1584 assert_does_not_exist(&map, b"some/file2");
1561 1585
1562 1586 map.set_untracked(p(b"some/file3"))?;
1563 1587 assert_eq!(map.len(), 0);
1564 1588 assert_does_not_exist(&map, b"some/file3");
1565 1589
1566 1590 Ok(())
1567 1591 }
1568 1592
1569 1593 /// Check with a mix of tracked and non-tracked items
1570 1594 #[test]
1571 1595 fn test_tracked_descendants_different() -> Result<(), DirstateError> {
1572 1596 let mut map = OwningDirstateMap::new_empty(vec![]);
1573 1597
1574 1598 // A file that was just added
1575 1599 map.set_tracked(p(b"some/nested/path"))?;
1576 1600 // This has no information, the dirstate should ignore it
1577 1601 map.reset_state(p(b"some/file"), false, false, false, false, None)?;
1578 1602 assert_does_not_exist(&map, b"some/file");
1579 1603
1580 1604 // A file that was removed
1581 1605 map.reset_state(
1582 1606 p(b"some/nested/file"),
1583 1607 false,
1584 1608 true,
1585 1609 false,
1586 1610 false,
1587 1611 None,
1588 1612 )?;
1589 1613 assert!(!map.get(p(b"some/nested/file"))?.unwrap().tracked());
1590 1614 // Only present in p2
1591 1615 map.reset_state(p(b"some/file3"), false, false, true, false, None)?;
1592 1616 assert!(!map.get(p(b"some/file3"))?.unwrap().tracked());
1593 1617 // A file that was merged
1594 1618 map.reset_state(p(b"root_file"), true, true, true, false, None)?;
1595 1619 assert!(map.get(p(b"root_file"))?.unwrap().tracked());
1596 1620 // A file that is added, with info from p2
1597 1621 // XXX is that actually possible?
1598 1622 map.reset_state(p(b"some/file2"), true, false, true, false, None)?;
1599 1623 assert!(map.get(p(b"some/file2"))?.unwrap().tracked());
1600 1624 // A clean file
1601 1625 // One layer without any files to test deletion cascade
1602 1626 map.reset_state(
1603 1627 p(b"some/other/nested/path"),
1604 1628 true,
1605 1629 true,
1606 1630 false,
1607 1631 false,
1608 1632 None,
1609 1633 )?;
1610 1634 assert!(map.get(p(b"some/other/nested/path"))?.unwrap().tracked());
1611 1635
1612 1636 assert_eq!(map.len(), 6);
1613 1637 assert_eq!(tracked_descendants(&map, b"some"), 3);
1614 1638 assert_eq!(descendants_with_an_entry(&map, b"some"), 5);
1615 1639 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1616 1640 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1617 1641 assert_eq!(tracked_descendants(&map, b"some/other/nested/path"), 0);
1618 1642 assert_eq!(
1619 1643 descendants_with_an_entry(&map, b"some/other/nested/path"),
1620 1644 0
1621 1645 );
1622 1646 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1623 1647 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1624 1648
1625 1649 // might as well check this
1626 1650 map.set_untracked(p(b"path/does/not/exist"))?;
1627 1651 assert_eq!(map.len(), 6);
1628 1652
1629 1653 map.set_untracked(p(b"some/other/nested/path"))?;
1630 1654 // It is set untracked but not deleted since it held other information
1631 1655 assert_eq!(map.len(), 6);
1632 1656 assert_eq!(tracked_descendants(&map, b"some"), 2);
1633 1657 assert_eq!(descendants_with_an_entry(&map, b"some"), 5);
1634 1658 assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1);
1635 1659 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1636 1660 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1637 1661 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1638 1662
1639 1663 map.set_untracked(p(b"some/nested/path"))?;
1640 1664 // It is set untracked *and* deleted since it was only added
1641 1665 assert_eq!(map.len(), 5);
1642 1666 assert_eq!(tracked_descendants(&map, b"some"), 1);
1643 1667 assert_eq!(descendants_with_an_entry(&map, b"some"), 4);
1644 1668 assert_eq!(tracked_descendants(&map, b"some/nested"), 0);
1645 1669 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 1);
1646 1670 assert_does_not_exist(&map, b"some/nested/path");
1647 1671
1648 1672 map.set_untracked(p(b"root_file"))?;
1649 1673 // Untracked but not deleted
1650 1674 assert_eq!(map.len(), 5);
1651 1675 assert!(map.get(p(b"root_file"))?.is_some());
1652 1676
1653 1677 map.set_untracked(p(b"some/file2"))?;
1654 1678 assert_eq!(map.len(), 5);
1655 1679 assert_eq!(tracked_descendants(&map, b"some"), 0);
1656 1680 assert!(map.get(p(b"some/file2"))?.is_some());
1657 1681
1658 1682 map.set_untracked(p(b"some/file3"))?;
1659 1683 assert_eq!(map.len(), 5);
1660 1684 assert_eq!(tracked_descendants(&map, b"some"), 0);
1661 1685 assert!(map.get(p(b"some/file3"))?.is_some());
1662 1686
1663 1687 Ok(())
1664 1688 }
1665 1689
1666 1690 /// Check that copies counter is correctly updated
1667 1691 #[test]
1668 1692 fn test_copy_source() -> Result<(), DirstateError> {
1669 1693 let mut map = OwningDirstateMap::new_empty(vec![]);
1670 1694
1671 1695 // Clean file
1672 1696 map.reset_state(p(b"files/clean"), true, true, false, false, None)?;
1673 1697 // Merged file
1674 1698 map.reset_state(p(b"files/from_p2"), true, true, true, false, None)?;
1675 1699 // Removed file
1676 1700 map.reset_state(p(b"removed"), false, true, false, false, None)?;
1677 1701 // Added file
1678 1702 map.reset_state(p(b"files/added"), true, false, false, false, None)?;
1679 1703 // Add copy
1680 1704 map.copy_map_insert(p(b"files/clean"), p(b"clean_copy_source"))?;
1681 1705 assert_eq!(map.copy_map_len(), 1);
1682 1706
1683 1707 // Copy override
1684 1708 map.copy_map_insert(p(b"files/clean"), p(b"other_clean_copy_source"))?;
1685 1709 assert_eq!(map.copy_map_len(), 1);
1686 1710
1687 1711 // Multiple copies
1688 1712 map.copy_map_insert(p(b"removed"), p(b"removed_copy_source"))?;
1689 1713 assert_eq!(map.copy_map_len(), 2);
1690 1714
1691 1715 map.copy_map_insert(p(b"files/added"), p(b"added_copy_source"))?;
1692 1716 assert_eq!(map.copy_map_len(), 3);
1693 1717
1694 1718 // Added, so the entry is completely removed
1695 1719 map.set_untracked(p(b"files/added"))?;
1696 1720 assert_does_not_exist(&map, b"files/added");
1697 1721 assert_eq!(map.copy_map_len(), 2);
1698 1722
1699 1723 // Removed, so the entry is kept around, so is its copy
1700 1724 map.set_untracked(p(b"removed"))?;
1701 1725 assert!(map.get(p(b"removed"))?.is_some());
1702 1726 assert_eq!(map.copy_map_len(), 2);
1703 1727
1704 1728 // Clean, so the entry is kept around, but not its copy
1705 1729 map.set_untracked(p(b"files/clean"))?;
1706 1730 assert!(map.get(p(b"files/clean"))?.is_some());
1707 1731 assert_eq!(map.copy_map_len(), 1);
1708 1732
1709 1733 map.copy_map_insert(p(b"files/from_p2"), p(b"from_p2_copy_source"))?;
1710 1734 assert_eq!(map.copy_map_len(), 2);
1711 1735
1712 1736 // Info from p2, so its copy source info is kept around
1713 1737 map.set_untracked(p(b"files/from_p2"))?;
1714 1738 assert!(map.get(p(b"files/from_p2"))?.is_some());
1715 1739 assert_eq!(map.copy_map_len(), 2);
1716 1740
1717 1741 Ok(())
1718 1742 }
1719 1743
1720 1744 /// Test with "on disk" data. For the sake of this test, the "on disk" data
1721 1745 /// does not actually come from the disk, but it's opaque to the code being
1722 1746 /// tested.
1723 1747 #[test]
1724 1748 fn test_on_disk() -> Result<(), DirstateError> {
1725 1749 // First let's create some data to put "on disk"
1726 1750 let mut map = OwningDirstateMap::new_empty(vec![]);
1727 1751
1728 1752 // A file that was just added
1729 1753 map.set_tracked(p(b"some/nested/added"))?;
1730 1754 map.copy_map_insert(p(b"some/nested/added"), p(b"added_copy_source"))?;
1731 1755
1732 1756 // A file that was removed
1733 1757 map.reset_state(
1734 1758 p(b"some/nested/removed"),
1735 1759 false,
1736 1760 true,
1737 1761 false,
1738 1762 false,
1739 1763 None,
1740 1764 )?;
1741 1765 // Only present in p2
1742 1766 map.reset_state(
1743 1767 p(b"other/p2_info_only"),
1744 1768 false,
1745 1769 false,
1746 1770 true,
1747 1771 false,
1748 1772 None,
1749 1773 )?;
1750 1774 map.copy_map_insert(
1751 1775 p(b"other/p2_info_only"),
1752 1776 p(b"other/p2_info_copy_source"),
1753 1777 )?;
1754 1778 // A file that was merged
1755 1779 map.reset_state(p(b"merged"), true, true, true, false, None)?;
1756 1780 // A file that is added, with info from p2
1757 1781 // XXX is that actually possible?
1758 1782 map.reset_state(
1759 1783 p(b"other/added_with_p2"),
1760 1784 true,
1761 1785 false,
1762 1786 true,
1763 1787 false,
1764 1788 None,
1765 1789 )?;
1766 1790 // One layer without any files to test deletion cascade
1767 1791 // A clean file
1768 1792 map.reset_state(
1769 1793 p(b"some/other/nested/clean"),
1770 1794 true,
1771 1795 true,
1772 1796 false,
1773 1797 false,
1774 1798 None,
1775 1799 )?;
1776 1800
1777 1801 let (packed, metadata, _should_append) = map.pack_v2(false)?;
1778 1802 let packed_len = packed.len();
1779 1803 assert!(packed_len > 0);
1780 1804
1781 1805 // Recreate "from disk"
1782 1806 let mut map = OwningDirstateMap::new_v2(
1783 1807 packed,
1784 1808 packed_len,
1785 1809 metadata.as_bytes(),
1786 1810 )?;
1787 1811
1788 1812 // Check that everything is accounted for
1789 1813 assert!(map.contains_key(p(b"some/nested/added"))?);
1790 1814 assert!(map.contains_key(p(b"some/nested/removed"))?);
1791 1815 assert!(map.contains_key(p(b"merged"))?);
1792 1816 assert!(map.contains_key(p(b"other/p2_info_only"))?);
1793 1817 assert!(map.contains_key(p(b"other/added_with_p2"))?);
1794 1818 assert!(map.contains_key(p(b"some/other/nested/clean"))?);
1795 1819 assert_eq!(
1796 1820 map.copy_map_get(p(b"some/nested/added"))?,
1797 1821 Some(p(b"added_copy_source"))
1798 1822 );
1799 1823 assert_eq!(
1800 1824 map.copy_map_get(p(b"other/p2_info_only"))?,
1801 1825 Some(p(b"other/p2_info_copy_source"))
1802 1826 );
1803 1827 assert_eq!(tracked_descendants(&map, b"some"), 2);
1804 1828 assert_eq!(descendants_with_an_entry(&map, b"some"), 3);
1805 1829 assert_eq!(tracked_descendants(&map, b"other"), 1);
1806 1830 assert_eq!(descendants_with_an_entry(&map, b"other"), 2);
1807 1831 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1808 1832 assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1);
1809 1833 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1810 1834 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1811 1835 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1812 1836 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1813 1837 assert_eq!(map.len(), 6);
1814 1838 assert_eq!(map.get_map().unreachable_bytes, 0);
1815 1839 assert_eq!(map.copy_map_len(), 2);
1816 1840
1817 1841 // Shouldn't change anything since it's already not tracked
1818 1842 map.set_untracked(p(b"some/nested/removed"))?;
1819 1843 assert_eq!(map.get_map().unreachable_bytes, 0);
1820 1844
1821 1845 match map.get_map().root {
1822 1846 ChildNodes::InMemory(_) => {
1823 1847 panic!("root should not have been mutated")
1824 1848 }
1825 1849 _ => (),
1826 1850 }
1827 1851 // We haven't mutated enough (nothing, actually), we should still be in
1828 1852 // the append strategy
1829 1853 assert!(map.get_map().write_should_append());
1830 1854
1831 1855 // But this mutates the structure, so there should be unreachable_bytes
1832 1856 assert!(map.set_untracked(p(b"some/nested/added"))?);
1833 1857 let unreachable_bytes = map.get_map().unreachable_bytes;
1834 1858 assert!(unreachable_bytes > 0);
1835 1859
1836 1860 match map.get_map().root {
1837 1861 ChildNodes::OnDisk(_) => panic!("root should have been mutated"),
1838 1862 _ => (),
1839 1863 }
1840 1864
1841 1865 // This should not mutate the structure either, since `root` has
1842 1866 // already been mutated along with its direct children.
1843 1867 map.set_untracked(p(b"merged"))?;
1844 1868 assert_eq!(map.get_map().unreachable_bytes, unreachable_bytes);
1845 1869
1846 1870 match map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap() {
1847 1871 NodeRef::InMemory(_, _) => {
1848 1872 panic!("'other/added_with_p2' should not have been mutated")
1849 1873 }
1850 1874 _ => (),
1851 1875 }
1852 1876 // But this should, since it's in a different path
1853 1877 // than `<root>some/nested/add`
1854 1878 map.set_untracked(p(b"other/added_with_p2"))?;
1855 1879 assert!(map.get_map().unreachable_bytes > unreachable_bytes);
1856 1880
1857 1881 match map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap() {
1858 1882 NodeRef::OnDisk(_) => {
1859 1883 panic!("'other/added_with_p2' should have been mutated")
1860 1884 }
1861 1885 _ => (),
1862 1886 }
1863 1887
1864 1888 // We have rewritten most of the tree, we should create a new file
1865 1889 assert!(!map.get_map().write_should_append());
1866 1890
1867 1891 Ok(())
1868 1892 }
1869 1893 }
@@ -1,846 +1,839 b''
1 1 use crate::dirstate::entry::TruncatedTimestamp;
2 2 use crate::dirstate::status::IgnoreFnType;
3 3 use crate::dirstate::status::StatusPath;
4 4 use crate::dirstate_tree::dirstate_map::BorrowedPath;
5 5 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
6 6 use crate::dirstate_tree::dirstate_map::DirstateMap;
7 use crate::dirstate_tree::dirstate_map::NodeData;
8 7 use crate::dirstate_tree::dirstate_map::NodeRef;
9 8 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
10 9 use crate::matchers::get_ignore_function;
11 10 use crate::matchers::Matcher;
12 11 use crate::utils::files::get_bytes_from_os_string;
13 12 use crate::utils::files::get_path_from_bytes;
14 13 use crate::utils::hg_path::HgPath;
15 14 use crate::BadMatch;
16 15 use crate::DirstateStatus;
17 16 use crate::EntryState;
18 17 use crate::HgPathBuf;
19 18 use crate::HgPathCow;
20 19 use crate::PatternFileWarning;
21 20 use crate::StatusError;
22 21 use crate::StatusOptions;
23 22 use micro_timer::timed;
24 23 use rayon::prelude::*;
25 24 use sha1::{Digest, Sha1};
26 25 use std::borrow::Cow;
27 26 use std::io;
28 27 use std::path::Path;
29 28 use std::path::PathBuf;
30 29 use std::sync::Mutex;
31 30 use std::time::SystemTime;
32 31
33 32 /// Returns the status of the working directory compared to its parent
34 33 /// changeset.
35 34 ///
36 35 /// This algorithm is based on traversing the filesystem tree (`fs` in function
37 36 /// and variable names) and dirstate tree at the same time. The core of this
38 37 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
39 38 /// and its use of `itertools::merge_join_by`. When reaching a path that only
40 39 /// exists in one of the two trees, depending on information requested by
41 40 /// `options` we may need to traverse the remaining subtree.
42 41 #[timed]
43 42 pub fn status<'dirstate>(
44 43 dmap: &'dirstate mut DirstateMap,
45 44 matcher: &(dyn Matcher + Sync),
46 45 root_dir: PathBuf,
47 46 ignore_files: Vec<PathBuf>,
48 47 options: StatusOptions,
49 48 ) -> Result<(DirstateStatus<'dirstate>, Vec<PatternFileWarning>), StatusError>
50 49 {
51 50 // Force the global rayon threadpool to not exceed 16 concurrent threads.
52 51 // This is a stop-gap measure until we figure out why using more than 16
53 52 // threads makes `status` slower for each additional thread.
54 53 // We use `ok()` in case the global threadpool has already been
55 54 // instantiated in `rhg` or some other caller.
56 55 // TODO find the underlying cause and fix it, then remove this.
57 56 rayon::ThreadPoolBuilder::new()
58 57 .num_threads(16)
59 58 .build_global()
60 59 .ok();
61 60
62 61 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
63 62 if options.list_ignored || options.list_unknown {
64 63 let mut hasher = Sha1::new();
65 64 let (ignore_fn, warnings) = get_ignore_function(
66 65 ignore_files,
67 66 &root_dir,
68 67 &mut |pattern_bytes| hasher.update(pattern_bytes),
69 68 )?;
70 69 let new_hash = *hasher.finalize().as_ref();
71 70 let changed = new_hash != dmap.ignore_patterns_hash;
72 71 dmap.ignore_patterns_hash = new_hash;
73 72 (ignore_fn, warnings, Some(changed))
74 73 } else {
75 74 (Box::new(|&_| true), vec![], None)
76 75 };
77 76
78 77 let filesystem_time_at_status_start =
79 78 filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
80 79
81 80 // If the repository is under the current directory, prefer using a
82 81 // relative path, so the kernel needs to traverse fewer directory in every
83 82 // call to `read_dir` or `symlink_metadata`.
84 83 // This is effective in the common case where the current directory is the
85 84 // repository root.
86 85
87 86 // TODO: Better yet would be to use libc functions like `openat` and
88 87 // `fstatat` to remove such repeated traversals entirely, but the standard
89 88 // library does not provide APIs based on those.
90 89 // Maybe with a crate like https://crates.io/crates/openat instead?
91 90 let root_dir = if let Some(relative) = std::env::current_dir()
92 91 .ok()
93 92 .and_then(|cwd| root_dir.strip_prefix(cwd).ok())
94 93 {
95 94 relative
96 95 } else {
97 96 &root_dir
98 97 };
99 98
100 99 let outcome = DirstateStatus {
101 100 filesystem_time_at_status_start,
102 101 ..Default::default()
103 102 };
104 103 let common = StatusCommon {
105 104 dmap,
106 105 options,
107 106 matcher,
108 107 ignore_fn,
109 108 outcome: Mutex::new(outcome),
110 109 ignore_patterns_have_changed: patterns_changed,
111 110 new_cachable_directories: Default::default(),
112 111 outated_cached_directories: Default::default(),
113 112 filesystem_time_at_status_start,
114 113 };
115 114 let is_at_repo_root = true;
116 115 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
117 116 let has_ignored_ancestor = false;
118 117 let root_cached_mtime = None;
119 118 let root_dir_metadata = None;
120 119 // If the path we have for the repository root is a symlink, do follow it.
121 120 // (As opposed to symlinks within the working directory which are not
122 121 // followed, using `std::fs::symlink_metadata`.)
123 122 common.traverse_fs_directory_and_dirstate(
124 123 has_ignored_ancestor,
125 124 dmap.root.as_ref(),
126 125 hg_path,
127 126 &root_dir,
128 127 root_dir_metadata,
129 128 root_cached_mtime,
130 129 is_at_repo_root,
131 130 )?;
132 131 let mut outcome = common.outcome.into_inner().unwrap();
133 132 let new_cachable = common.new_cachable_directories.into_inner().unwrap();
134 133 let outdated = common.outated_cached_directories.into_inner().unwrap();
135 134
136 135 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
137 136 || !outdated.is_empty()
138 137 || !new_cachable.is_empty();
139 138
140 139 // Remove outdated mtimes before adding new mtimes, in case a given
141 140 // directory is both
142 141 for path in &outdated {
143 142 dmap.clear_cached_mtime(path)?;
144 143 }
145 144 for (path, mtime) in &new_cachable {
146 let node = dmap.get_or_insert(path)?;
147 match &node.data {
148 NodeData::Entry(_) => {} // Don’t overwrite an entry
149 NodeData::CachedDirectory { .. } | NodeData::None => {
150 node.data = NodeData::CachedDirectory { mtime: *mtime }
151 }
152 }
145 dmap.set_cached_mtime(path, *mtime)?;
153 146 }
154 147
155 148 Ok((outcome, warnings))
156 149 }
157 150
158 151 /// Bag of random things needed by various parts of the algorithm. Reduces the
159 152 /// number of parameters passed to functions.
160 153 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
161 154 dmap: &'tree DirstateMap<'on_disk>,
162 155 options: StatusOptions,
163 156 matcher: &'a (dyn Matcher + Sync),
164 157 ignore_fn: IgnoreFnType<'a>,
165 158 outcome: Mutex<DirstateStatus<'on_disk>>,
166 159 new_cachable_directories:
167 160 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
168 161 outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
169 162
170 163 /// Whether ignore files like `.hgignore` have changed since the previous
171 164 /// time a `status()` call wrote their hash to the dirstate. `None` means
172 165 /// we don’t know as this run doesn’t list either ignored or uknown files
173 166 /// and therefore isn’t reading `.hgignore`.
174 167 ignore_patterns_have_changed: Option<bool>,
175 168
176 169 /// The current time at the start of the `status()` algorithm, as measured
177 170 /// and possibly truncated by the filesystem.
178 171 filesystem_time_at_status_start: Option<TruncatedTimestamp>,
179 172 }
180 173
181 174 enum Outcome {
182 175 Modified,
183 176 Added,
184 177 Removed,
185 178 Deleted,
186 179 Clean,
187 180 Ignored,
188 181 Unknown,
189 182 Unsure,
190 183 }
191 184
192 185 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
193 186 fn push_outcome(
194 187 &self,
195 188 which: Outcome,
196 189 dirstate_node: &NodeRef<'tree, 'on_disk>,
197 190 ) -> Result<(), DirstateV2ParseError> {
198 191 let path = dirstate_node
199 192 .full_path_borrowed(self.dmap.on_disk)?
200 193 .detach_from_tree();
201 194 let copy_source = if self.options.list_copies {
202 195 dirstate_node
203 196 .copy_source_borrowed(self.dmap.on_disk)?
204 197 .map(|source| source.detach_from_tree())
205 198 } else {
206 199 None
207 200 };
208 201 self.push_outcome_common(which, path, copy_source);
209 202 Ok(())
210 203 }
211 204
212 205 fn push_outcome_without_copy_source(
213 206 &self,
214 207 which: Outcome,
215 208 path: &BorrowedPath<'_, 'on_disk>,
216 209 ) {
217 210 self.push_outcome_common(which, path.detach_from_tree(), None)
218 211 }
219 212
220 213 fn push_outcome_common(
221 214 &self,
222 215 which: Outcome,
223 216 path: HgPathCow<'on_disk>,
224 217 copy_source: Option<HgPathCow<'on_disk>>,
225 218 ) {
226 219 let mut outcome = self.outcome.lock().unwrap();
227 220 let vec = match which {
228 221 Outcome::Modified => &mut outcome.modified,
229 222 Outcome::Added => &mut outcome.added,
230 223 Outcome::Removed => &mut outcome.removed,
231 224 Outcome::Deleted => &mut outcome.deleted,
232 225 Outcome::Clean => &mut outcome.clean,
233 226 Outcome::Ignored => &mut outcome.ignored,
234 227 Outcome::Unknown => &mut outcome.unknown,
235 228 Outcome::Unsure => &mut outcome.unsure,
236 229 };
237 230 vec.push(StatusPath { path, copy_source });
238 231 }
239 232
240 233 fn read_dir(
241 234 &self,
242 235 hg_path: &HgPath,
243 236 fs_path: &Path,
244 237 is_at_repo_root: bool,
245 238 ) -> Result<Vec<DirEntry>, ()> {
246 239 DirEntry::read_dir(fs_path, is_at_repo_root)
247 240 .map_err(|error| self.io_error(error, hg_path))
248 241 }
249 242
250 243 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
251 244 let errno = error.raw_os_error().expect("expected real OS error");
252 245 self.outcome
253 246 .lock()
254 247 .unwrap()
255 248 .bad
256 249 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
257 250 }
258 251
259 252 fn check_for_outdated_directory_cache(
260 253 &self,
261 254 dirstate_node: &NodeRef<'tree, 'on_disk>,
262 255 ) -> Result<(), DirstateV2ParseError> {
263 256 if self.ignore_patterns_have_changed == Some(true)
264 257 && dirstate_node.cached_directory_mtime()?.is_some()
265 258 {
266 259 self.outated_cached_directories.lock().unwrap().push(
267 260 dirstate_node
268 261 .full_path_borrowed(self.dmap.on_disk)?
269 262 .detach_from_tree(),
270 263 )
271 264 }
272 265 Ok(())
273 266 }
274 267
275 268 /// If this returns true, we can get accurate results by only using
276 269 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
277 270 /// need to call `read_dir`.
278 271 fn can_skip_fs_readdir(
279 272 &self,
280 273 directory_metadata: Option<&std::fs::Metadata>,
281 274 cached_directory_mtime: Option<TruncatedTimestamp>,
282 275 ) -> bool {
283 276 if !self.options.list_unknown && !self.options.list_ignored {
284 277 // All states that we care about listing have corresponding
285 278 // dirstate entries.
286 279 // This happens for example with `hg status -mard`.
287 280 return true;
288 281 }
289 282 if !self.options.list_ignored
290 283 && self.ignore_patterns_have_changed == Some(false)
291 284 {
292 285 if let Some(cached_mtime) = cached_directory_mtime {
293 286 // The dirstate contains a cached mtime for this directory, set
294 287 // by a previous run of the `status` algorithm which found this
295 288 // directory eligible for `read_dir` caching.
296 289 if let Some(meta) = directory_metadata {
297 290 if cached_mtime
298 291 .likely_equal_to_mtime_of(meta)
299 292 .unwrap_or(false)
300 293 {
301 294 // The mtime of that directory has not changed
302 295 // since then, which means that the results of
303 296 // `read_dir` should also be unchanged.
304 297 return true;
305 298 }
306 299 }
307 300 }
308 301 }
309 302 false
310 303 }
311 304
312 305 /// Returns whether all child entries of the filesystem directory have a
313 306 /// corresponding dirstate node or are ignored.
314 307 fn traverse_fs_directory_and_dirstate(
315 308 &self,
316 309 has_ignored_ancestor: bool,
317 310 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
318 311 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
319 312 directory_fs_path: &Path,
320 313 directory_metadata: Option<&std::fs::Metadata>,
321 314 cached_directory_mtime: Option<TruncatedTimestamp>,
322 315 is_at_repo_root: bool,
323 316 ) -> Result<bool, DirstateV2ParseError> {
324 317 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
325 318 {
326 319 dirstate_nodes
327 320 .par_iter()
328 321 .map(|dirstate_node| {
329 322 let fs_path = directory_fs_path.join(get_path_from_bytes(
330 323 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
331 324 ));
332 325 match std::fs::symlink_metadata(&fs_path) {
333 326 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
334 327 &fs_path,
335 328 &fs_metadata,
336 329 dirstate_node,
337 330 has_ignored_ancestor,
338 331 ),
339 332 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
340 333 self.traverse_dirstate_only(dirstate_node)
341 334 }
342 335 Err(error) => {
343 336 let hg_path =
344 337 dirstate_node.full_path(self.dmap.on_disk)?;
345 338 Ok(self.io_error(error, hg_path))
346 339 }
347 340 }
348 341 })
349 342 .collect::<Result<_, _>>()?;
350 343
351 344 // We don’t know, so conservatively say this isn’t the case
352 345 let children_all_have_dirstate_node_or_are_ignored = false;
353 346
354 347 return Ok(children_all_have_dirstate_node_or_are_ignored);
355 348 }
356 349
357 350 let mut fs_entries = if let Ok(entries) = self.read_dir(
358 351 directory_hg_path,
359 352 directory_fs_path,
360 353 is_at_repo_root,
361 354 ) {
362 355 entries
363 356 } else {
364 357 // Treat an unreadable directory (typically because of insufficient
365 358 // permissions) like an empty directory. `self.read_dir` has
366 359 // already called `self.io_error` so a warning will be emitted.
367 360 Vec::new()
368 361 };
369 362
370 363 // `merge_join_by` requires both its input iterators to be sorted:
371 364
372 365 let dirstate_nodes = dirstate_nodes.sorted();
373 366 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
374 367 // https://github.com/rust-lang/rust/issues/34162
375 368 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
376 369
377 370 // Propagate here any error that would happen inside the comparison
378 371 // callback below
379 372 for dirstate_node in &dirstate_nodes {
380 373 dirstate_node.base_name(self.dmap.on_disk)?;
381 374 }
382 375 itertools::merge_join_by(
383 376 dirstate_nodes,
384 377 &fs_entries,
385 378 |dirstate_node, fs_entry| {
386 379 // This `unwrap` never panics because we already propagated
387 380 // those errors above
388 381 dirstate_node
389 382 .base_name(self.dmap.on_disk)
390 383 .unwrap()
391 384 .cmp(&fs_entry.base_name)
392 385 },
393 386 )
394 387 .par_bridge()
395 388 .map(|pair| {
396 389 use itertools::EitherOrBoth::*;
397 390 let has_dirstate_node_or_is_ignored;
398 391 match pair {
399 392 Both(dirstate_node, fs_entry) => {
400 393 self.traverse_fs_and_dirstate(
401 394 &fs_entry.full_path,
402 395 &fs_entry.metadata,
403 396 dirstate_node,
404 397 has_ignored_ancestor,
405 398 )?;
406 399 has_dirstate_node_or_is_ignored = true
407 400 }
408 401 Left(dirstate_node) => {
409 402 self.traverse_dirstate_only(dirstate_node)?;
410 403 has_dirstate_node_or_is_ignored = true;
411 404 }
412 405 Right(fs_entry) => {
413 406 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
414 407 has_ignored_ancestor,
415 408 directory_hg_path,
416 409 fs_entry,
417 410 )
418 411 }
419 412 }
420 413 Ok(has_dirstate_node_or_is_ignored)
421 414 })
422 415 .try_reduce(|| true, |a, b| Ok(a && b))
423 416 }
424 417
425 418 fn traverse_fs_and_dirstate(
426 419 &self,
427 420 fs_path: &Path,
428 421 fs_metadata: &std::fs::Metadata,
429 422 dirstate_node: NodeRef<'tree, 'on_disk>,
430 423 has_ignored_ancestor: bool,
431 424 ) -> Result<(), DirstateV2ParseError> {
432 425 self.check_for_outdated_directory_cache(&dirstate_node)?;
433 426 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
434 427 let file_type = fs_metadata.file_type();
435 428 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
436 429 if !file_or_symlink {
437 430 // If we previously had a file here, it was removed (with
438 431 // `hg rm` or similar) or deleted before it could be
439 432 // replaced by a directory or something else.
440 433 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
441 434 }
442 435 if file_type.is_dir() {
443 436 if self.options.collect_traversed_dirs {
444 437 self.outcome
445 438 .lock()
446 439 .unwrap()
447 440 .traversed
448 441 .push(hg_path.detach_from_tree())
449 442 }
450 443 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
451 444 let is_at_repo_root = false;
452 445 let children_all_have_dirstate_node_or_are_ignored = self
453 446 .traverse_fs_directory_and_dirstate(
454 447 is_ignored,
455 448 dirstate_node.children(self.dmap.on_disk)?,
456 449 hg_path,
457 450 fs_path,
458 451 Some(fs_metadata),
459 452 dirstate_node.cached_directory_mtime()?,
460 453 is_at_repo_root,
461 454 )?;
462 455 self.maybe_save_directory_mtime(
463 456 children_all_have_dirstate_node_or_are_ignored,
464 457 fs_metadata,
465 458 dirstate_node,
466 459 )?
467 460 } else {
468 461 if file_or_symlink && self.matcher.matches(hg_path) {
469 462 if let Some(state) = dirstate_node.state()? {
470 463 match state {
471 464 EntryState::Added => {
472 465 self.push_outcome(Outcome::Added, &dirstate_node)?
473 466 }
474 467 EntryState::Removed => self
475 468 .push_outcome(Outcome::Removed, &dirstate_node)?,
476 469 EntryState::Merged => self
477 470 .push_outcome(Outcome::Modified, &dirstate_node)?,
478 471 EntryState::Normal => self
479 472 .handle_normal_file(&dirstate_node, fs_metadata)?,
480 473 }
481 474 } else {
482 475 // `node.entry.is_none()` indicates a "directory"
483 476 // node, but the filesystem has a file
484 477 self.mark_unknown_or_ignored(
485 478 has_ignored_ancestor,
486 479 hg_path,
487 480 );
488 481 }
489 482 }
490 483
491 484 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
492 485 {
493 486 self.traverse_dirstate_only(child_node)?
494 487 }
495 488 }
496 489 Ok(())
497 490 }
498 491
499 492 fn maybe_save_directory_mtime(
500 493 &self,
501 494 children_all_have_dirstate_node_or_are_ignored: bool,
502 495 directory_metadata: &std::fs::Metadata,
503 496 dirstate_node: NodeRef<'tree, 'on_disk>,
504 497 ) -> Result<(), DirstateV2ParseError> {
505 498 if !children_all_have_dirstate_node_or_are_ignored {
506 499 return Ok(());
507 500 }
508 501 // All filesystem directory entries from `read_dir` have a
509 502 // corresponding node in the dirstate, so we can reconstitute the
510 503 // names of those entries without calling `read_dir` again.
511 504
512 505 // TODO: use let-else here and below when available:
513 506 // https://github.com/rust-lang/rust/issues/87335
514 507 let status_start = if let Some(status_start) =
515 508 &self.filesystem_time_at_status_start
516 509 {
517 510 status_start
518 511 } else {
519 512 return Ok(());
520 513 };
521 514
522 515 // Although the Rust standard library’s `SystemTime` type
523 516 // has nanosecond precision, the times reported for a
524 517 // directory’s (or file’s) modified time may have lower
525 518 // resolution based on the filesystem (for example ext3
526 519 // only stores integer seconds), kernel (see
527 520 // https://stackoverflow.com/a/14393315/1162888), etc.
528 521 let directory_mtime = if let Ok(option) =
529 522 TruncatedTimestamp::for_reliable_mtime_of(
530 523 directory_metadata,
531 524 status_start,
532 525 ) {
533 526 if let Some(directory_mtime) = option {
534 527 directory_mtime
535 528 } else {
536 529 // The directory was modified too recently,
537 530 // don’t cache its `read_dir` results.
538 531 //
539 532 // 1. A change to this directory (direct child was
540 533 // added or removed) cause its mtime to be set
541 534 // (possibly truncated) to `directory_mtime`
542 535 // 2. This `status` algorithm calls `read_dir`
543 536 // 3. An other change is made to the same directory is
544 537 // made so that calling `read_dir` agin would give
545 538 // different results, but soon enough after 1. that
546 539 // the mtime stays the same
547 540 //
548 541 // On a system where the time resolution poor, this
549 542 // scenario is not unlikely if all three steps are caused
550 543 // by the same script.
551 544 return Ok(());
552 545 }
553 546 } else {
554 547 // OS/libc does not support mtime?
555 548 return Ok(());
556 549 };
557 550 // We’ve observed (through `status_start`) that time has
558 551 // “progressed” since `directory_mtime`, so any further
559 552 // change to this directory is extremely likely to cause a
560 553 // different mtime.
561 554 //
562 555 // Having the same mtime again is not entirely impossible
563 556 // since the system clock is not monotonous. It could jump
564 557 // backward to some point before `directory_mtime`, then a
565 558 // directory change could potentially happen during exactly
566 559 // the wrong tick.
567 560 //
568 561 // We deem this scenario (unlike the previous one) to be
569 562 // unlikely enough in practice.
570 563
571 564 let is_up_to_date =
572 565 if let Some(cached) = dirstate_node.cached_directory_mtime()? {
573 566 cached.likely_equal(directory_mtime)
574 567 } else {
575 568 false
576 569 };
577 570 if !is_up_to_date {
578 571 let hg_path = dirstate_node
579 572 .full_path_borrowed(self.dmap.on_disk)?
580 573 .detach_from_tree();
581 574 self.new_cachable_directories
582 575 .lock()
583 576 .unwrap()
584 577 .push((hg_path, directory_mtime))
585 578 }
586 579 Ok(())
587 580 }
588 581
589 582 /// A file with `EntryState::Normal` in the dirstate was found in the
590 583 /// filesystem
591 584 fn handle_normal_file(
592 585 &self,
593 586 dirstate_node: &NodeRef<'tree, 'on_disk>,
594 587 fs_metadata: &std::fs::Metadata,
595 588 ) -> Result<(), DirstateV2ParseError> {
596 589 // Keep the low 31 bits
597 590 fn truncate_u64(value: u64) -> i32 {
598 591 (value & 0x7FFF_FFFF) as i32
599 592 }
600 593
601 594 let entry = dirstate_node
602 595 .entry()?
603 596 .expect("handle_normal_file called with entry-less node");
604 597 let mode_changed =
605 598 || self.options.check_exec && entry.mode_changed(fs_metadata);
606 599 let size = entry.size();
607 600 let size_changed = size != truncate_u64(fs_metadata.len());
608 601 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
609 602 // issue6456: Size returned may be longer due to encryption
610 603 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
611 604 self.push_outcome(Outcome::Unsure, dirstate_node)?
612 605 } else if dirstate_node.has_copy_source()
613 606 || entry.is_from_other_parent()
614 607 || (size >= 0 && (size_changed || mode_changed()))
615 608 {
616 609 self.push_outcome(Outcome::Modified, dirstate_node)?
617 610 } else {
618 611 let mtime_looks_clean;
619 612 if let Some(dirstate_mtime) = entry.truncated_mtime() {
620 613 let fs_mtime = TruncatedTimestamp::for_mtime_of(fs_metadata)
621 614 .expect("OS/libc does not support mtime?");
622 615 // There might be a change in the future if for example the
623 616 // internal clock become off while process run, but this is a
624 617 // case where the issues the user would face
625 618 // would be a lot worse and there is nothing we
626 619 // can really do.
627 620 mtime_looks_clean = fs_mtime.likely_equal(dirstate_mtime)
628 621 } else {
629 622 // No mtime in the dirstate entry
630 623 mtime_looks_clean = false
631 624 };
632 625 if !mtime_looks_clean {
633 626 self.push_outcome(Outcome::Unsure, dirstate_node)?
634 627 } else if self.options.list_clean {
635 628 self.push_outcome(Outcome::Clean, dirstate_node)?
636 629 }
637 630 }
638 631 Ok(())
639 632 }
640 633
641 634 /// A node in the dirstate tree has no corresponding filesystem entry
642 635 fn traverse_dirstate_only(
643 636 &self,
644 637 dirstate_node: NodeRef<'tree, 'on_disk>,
645 638 ) -> Result<(), DirstateV2ParseError> {
646 639 self.check_for_outdated_directory_cache(&dirstate_node)?;
647 640 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
648 641 dirstate_node
649 642 .children(self.dmap.on_disk)?
650 643 .par_iter()
651 644 .map(|child_node| self.traverse_dirstate_only(child_node))
652 645 .collect()
653 646 }
654 647
655 648 /// A node in the dirstate tree has no corresponding *file* on the
656 649 /// filesystem
657 650 ///
658 651 /// Does nothing on a "directory" node
659 652 fn mark_removed_or_deleted_if_file(
660 653 &self,
661 654 dirstate_node: &NodeRef<'tree, 'on_disk>,
662 655 ) -> Result<(), DirstateV2ParseError> {
663 656 if let Some(state) = dirstate_node.state()? {
664 657 let path = dirstate_node.full_path(self.dmap.on_disk)?;
665 658 if self.matcher.matches(path) {
666 659 if let EntryState::Removed = state {
667 660 self.push_outcome(Outcome::Removed, dirstate_node)?
668 661 } else {
669 662 self.push_outcome(Outcome::Deleted, &dirstate_node)?
670 663 }
671 664 }
672 665 }
673 666 Ok(())
674 667 }
675 668
676 669 /// Something in the filesystem has no corresponding dirstate node
677 670 ///
678 671 /// Returns whether that path is ignored
679 672 fn traverse_fs_only(
680 673 &self,
681 674 has_ignored_ancestor: bool,
682 675 directory_hg_path: &HgPath,
683 676 fs_entry: &DirEntry,
684 677 ) -> bool {
685 678 let hg_path = directory_hg_path.join(&fs_entry.base_name);
686 679 let file_type = fs_entry.metadata.file_type();
687 680 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
688 681 if file_type.is_dir() {
689 682 let is_ignored =
690 683 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
691 684 let traverse_children = if is_ignored {
692 685 // Descendants of an ignored directory are all ignored
693 686 self.options.list_ignored
694 687 } else {
695 688 // Descendants of an unknown directory may be either unknown or
696 689 // ignored
697 690 self.options.list_unknown || self.options.list_ignored
698 691 };
699 692 if traverse_children {
700 693 let is_at_repo_root = false;
701 694 if let Ok(children_fs_entries) = self.read_dir(
702 695 &hg_path,
703 696 &fs_entry.full_path,
704 697 is_at_repo_root,
705 698 ) {
706 699 children_fs_entries.par_iter().for_each(|child_fs_entry| {
707 700 self.traverse_fs_only(
708 701 is_ignored,
709 702 &hg_path,
710 703 child_fs_entry,
711 704 );
712 705 })
713 706 }
714 707 }
715 708 if self.options.collect_traversed_dirs {
716 709 self.outcome.lock().unwrap().traversed.push(hg_path.into())
717 710 }
718 711 is_ignored
719 712 } else {
720 713 if file_or_symlink {
721 714 if self.matcher.matches(&hg_path) {
722 715 self.mark_unknown_or_ignored(
723 716 has_ignored_ancestor,
724 717 &BorrowedPath::InMemory(&hg_path),
725 718 )
726 719 } else {
727 720 // We haven’t computed whether this path is ignored. It
728 721 // might not be, and a future run of status might have a
729 722 // different matcher that matches it. So treat it as not
730 723 // ignored. That is, inhibit readdir caching of the parent
731 724 // directory.
732 725 false
733 726 }
734 727 } else {
735 728 // This is neither a directory, a plain file, or a symlink.
736 729 // Treat it like an ignored file.
737 730 true
738 731 }
739 732 }
740 733 }
741 734
742 735 /// Returns whether that path is ignored
743 736 fn mark_unknown_or_ignored(
744 737 &self,
745 738 has_ignored_ancestor: bool,
746 739 hg_path: &BorrowedPath<'_, 'on_disk>,
747 740 ) -> bool {
748 741 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
749 742 if is_ignored {
750 743 if self.options.list_ignored {
751 744 self.push_outcome_without_copy_source(
752 745 Outcome::Ignored,
753 746 hg_path,
754 747 )
755 748 }
756 749 } else {
757 750 if self.options.list_unknown {
758 751 self.push_outcome_without_copy_source(
759 752 Outcome::Unknown,
760 753 hg_path,
761 754 )
762 755 }
763 756 }
764 757 is_ignored
765 758 }
766 759 }
767 760
768 761 struct DirEntry {
769 762 base_name: HgPathBuf,
770 763 full_path: PathBuf,
771 764 metadata: std::fs::Metadata,
772 765 }
773 766
774 767 impl DirEntry {
775 768 /// Returns **unsorted** entries in the given directory, with name and
776 769 /// metadata.
777 770 ///
778 771 /// If a `.hg` sub-directory is encountered:
779 772 ///
780 773 /// * At the repository root, ignore that sub-directory
781 774 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
782 775 /// list instead.
783 776 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
784 777 // `read_dir` returns a "not found" error for the empty path
785 778 let at_cwd = path == Path::new("");
786 779 let read_dir_path = if at_cwd { Path::new(".") } else { path };
787 780 let mut results = Vec::new();
788 781 for entry in read_dir_path.read_dir()? {
789 782 let entry = entry?;
790 783 let metadata = match entry.metadata() {
791 784 Ok(v) => v,
792 785 Err(e) => {
793 786 // race with file deletion?
794 787 if e.kind() == std::io::ErrorKind::NotFound {
795 788 continue;
796 789 } else {
797 790 return Err(e);
798 791 }
799 792 }
800 793 };
801 794 let file_name = entry.file_name();
802 795 // FIXME don't do this when cached
803 796 if file_name == ".hg" {
804 797 if is_at_repo_root {
805 798 // Skip the repo’s own .hg (might be a symlink)
806 799 continue;
807 800 } else if metadata.is_dir() {
808 801 // A .hg sub-directory at another location means a subrepo,
809 802 // skip it entirely.
810 803 return Ok(Vec::new());
811 804 }
812 805 }
813 806 let full_path = if at_cwd {
814 807 file_name.clone().into()
815 808 } else {
816 809 entry.path()
817 810 };
818 811 let base_name = get_bytes_from_os_string(file_name).into();
819 812 results.push(DirEntry {
820 813 base_name,
821 814 full_path,
822 815 metadata,
823 816 })
824 817 }
825 818 Ok(results)
826 819 }
827 820 }
828 821
829 822 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
830 823 /// of the give repository.
831 824 ///
832 825 /// This is similar to `SystemTime::now()`, with the result truncated to the
833 826 /// same time resolution as other files’ modification times. Using `.hg`
834 827 /// instead of the system’s default temporary directory (such as `/tmp`) makes
835 828 /// it more likely the temporary file is in the same disk partition as contents
836 829 /// of the working directory, which can matter since different filesystems may
837 830 /// store timestamps with different resolutions.
838 831 ///
839 832 /// This may fail, typically if we lack write permissions. In that case we
840 833 /// should continue the `status()` algoritm anyway and consider the current
841 834 /// date/time to be unknown.
842 835 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
843 836 tempfile::tempfile_in(repo_root.join(".hg"))?
844 837 .metadata()?
845 838 .modified()
846 839 }
General Comments 0
You need to be logged in to leave comments. Login now