##// END OF EJS Templates
rust-dirstatemap: add `clear_cached_mtime` helper method...
Raphaël Gomès -
r50018:f3e8b0b0 default
parent child Browse files
Show More
@@ -1,1849 +1,1869 b''
1 1 use bytes_cast::BytesCast;
2 2 use micro_timer::timed;
3 3 use std::borrow::Cow;
4 4 use std::path::PathBuf;
5 5
6 6 use super::on_disk;
7 7 use super::on_disk::DirstateV2ParseError;
8 8 use super::owning::OwningDirstateMap;
9 9 use super::path_with_basename::WithBasename;
10 10 use crate::dirstate::parsers::pack_entry;
11 11 use crate::dirstate::parsers::packed_entry_size;
12 12 use crate::dirstate::parsers::parse_dirstate_entries;
13 13 use crate::dirstate::CopyMapIter;
14 14 use crate::dirstate::DirstateV2Data;
15 15 use crate::dirstate::ParentFileData;
16 16 use crate::dirstate::StateMapIter;
17 17 use crate::dirstate::TruncatedTimestamp;
18 18 use crate::matchers::Matcher;
19 19 use crate::utils::hg_path::{HgPath, HgPathBuf};
20 20 use crate::DirstateEntry;
21 21 use crate::DirstateError;
22 22 use crate::DirstateMapError;
23 23 use crate::DirstateParents;
24 24 use crate::DirstateStatus;
25 25 use crate::EntryState;
26 26 use crate::FastHashbrownMap as FastHashMap;
27 27 use crate::PatternFileWarning;
28 28 use crate::StatusError;
29 29 use crate::StatusOptions;
30 30
31 31 /// Append to an existing data file if the amount of unreachable data (not used
32 32 /// anymore) is less than this fraction of the total amount of existing data.
33 33 const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5;
34 34
35 35 #[derive(Debug)]
36 36 pub struct DirstateMap<'on_disk> {
37 37 /// Contents of the `.hg/dirstate` file
38 38 pub(super) on_disk: &'on_disk [u8],
39 39
40 40 pub(super) root: ChildNodes<'on_disk>,
41 41
42 42 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
43 43 pub(super) nodes_with_entry_count: u32,
44 44
45 45 /// Number of nodes anywhere in the tree that have
46 46 /// `.copy_source.is_some()`.
47 47 pub(super) nodes_with_copy_source_count: u32,
48 48
49 49 /// See on_disk::Header
50 50 pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
51 51
52 52 /// How many bytes of `on_disk` are not used anymore
53 53 pub(super) unreachable_bytes: u32,
54 54 }
55 55
56 56 /// Using a plain `HgPathBuf` of the full path from the repository root as a
57 57 /// map key would also work: all paths in a given map have the same parent
58 58 /// path, so comparing full paths gives the same result as comparing base
59 59 /// names. However `HashMap` would waste time always re-hashing the same
60 60 /// string prefix.
61 61 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
62 62
63 63 /// Similar to `&'tree Cow<'on_disk, HgPath>`, but can also be returned
64 64 /// for on-disk nodes that don’t actually have a `Cow` to borrow.
65 65 #[derive(Debug)]
66 66 pub(super) enum BorrowedPath<'tree, 'on_disk> {
67 67 InMemory(&'tree HgPathBuf),
68 68 OnDisk(&'on_disk HgPath),
69 69 }
70 70
71 71 #[derive(Debug)]
72 72 pub(super) enum ChildNodes<'on_disk> {
73 73 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
74 74 OnDisk(&'on_disk [on_disk::Node]),
75 75 }
76 76
77 77 #[derive(Debug)]
78 78 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
79 79 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
80 80 OnDisk(&'on_disk [on_disk::Node]),
81 81 }
82 82
83 83 #[derive(Debug)]
84 84 pub(super) enum NodeRef<'tree, 'on_disk> {
85 85 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
86 86 OnDisk(&'on_disk on_disk::Node),
87 87 }
88 88
89 89 impl<'tree, 'on_disk> BorrowedPath<'tree, 'on_disk> {
90 90 pub fn detach_from_tree(&self) -> Cow<'on_disk, HgPath> {
91 91 match *self {
92 92 BorrowedPath::InMemory(in_memory) => Cow::Owned(in_memory.clone()),
93 93 BorrowedPath::OnDisk(on_disk) => Cow::Borrowed(on_disk),
94 94 }
95 95 }
96 96 }
97 97
98 98 impl<'tree, 'on_disk> std::ops::Deref for BorrowedPath<'tree, 'on_disk> {
99 99 type Target = HgPath;
100 100
101 101 fn deref(&self) -> &HgPath {
102 102 match *self {
103 103 BorrowedPath::InMemory(in_memory) => in_memory,
104 104 BorrowedPath::OnDisk(on_disk) => on_disk,
105 105 }
106 106 }
107 107 }
108 108
109 109 impl Default for ChildNodes<'_> {
110 110 fn default() -> Self {
111 111 ChildNodes::InMemory(Default::default())
112 112 }
113 113 }
114 114
115 115 impl<'on_disk> ChildNodes<'on_disk> {
116 116 pub(super) fn as_ref<'tree>(
117 117 &'tree self,
118 118 ) -> ChildNodesRef<'tree, 'on_disk> {
119 119 match self {
120 120 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
121 121 ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes),
122 122 }
123 123 }
124 124
125 125 pub(super) fn is_empty(&self) -> bool {
126 126 match self {
127 127 ChildNodes::InMemory(nodes) => nodes.is_empty(),
128 128 ChildNodes::OnDisk(nodes) => nodes.is_empty(),
129 129 }
130 130 }
131 131
132 132 fn make_mut(
133 133 &mut self,
134 134 on_disk: &'on_disk [u8],
135 135 unreachable_bytes: &mut u32,
136 136 ) -> Result<
137 137 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
138 138 DirstateV2ParseError,
139 139 > {
140 140 match self {
141 141 ChildNodes::InMemory(nodes) => Ok(nodes),
142 142 ChildNodes::OnDisk(nodes) => {
143 143 *unreachable_bytes +=
144 144 std::mem::size_of_val::<[on_disk::Node]>(nodes) as u32;
145 145 let nodes = nodes
146 146 .iter()
147 147 .map(|node| {
148 148 Ok((
149 149 node.path(on_disk)?,
150 150 node.to_in_memory_node(on_disk)?,
151 151 ))
152 152 })
153 153 .collect::<Result<_, _>>()?;
154 154 *self = ChildNodes::InMemory(nodes);
155 155 match self {
156 156 ChildNodes::InMemory(nodes) => Ok(nodes),
157 157 ChildNodes::OnDisk(_) => unreachable!(),
158 158 }
159 159 }
160 160 }
161 161 }
162 162 }
163 163
164 164 impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
165 165 pub(super) fn get(
166 166 &self,
167 167 base_name: &HgPath,
168 168 on_disk: &'on_disk [u8],
169 169 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
170 170 match self {
171 171 ChildNodesRef::InMemory(nodes) => Ok(nodes
172 172 .get_key_value(base_name)
173 173 .map(|(k, v)| NodeRef::InMemory(k, v))),
174 174 ChildNodesRef::OnDisk(nodes) => {
175 175 let mut parse_result = Ok(());
176 176 let search_result = nodes.binary_search_by(|node| {
177 177 match node.base_name(on_disk) {
178 178 Ok(node_base_name) => node_base_name.cmp(base_name),
179 179 Err(e) => {
180 180 parse_result = Err(e);
181 181 // Dummy comparison result, `search_result` won’t
182 182 // be used since `parse_result` is an error
183 183 std::cmp::Ordering::Equal
184 184 }
185 185 }
186 186 });
187 187 parse_result.map(|()| {
188 188 search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i]))
189 189 })
190 190 }
191 191 }
192 192 }
193 193
194 194 /// Iterate in undefined order
195 195 pub(super) fn iter(
196 196 &self,
197 197 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
198 198 match self {
199 199 ChildNodesRef::InMemory(nodes) => itertools::Either::Left(
200 200 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)),
201 201 ),
202 202 ChildNodesRef::OnDisk(nodes) => {
203 203 itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk))
204 204 }
205 205 }
206 206 }
207 207
208 208 /// Iterate in parallel in undefined order
209 209 pub(super) fn par_iter(
210 210 &self,
211 211 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
212 212 {
213 213 use rayon::prelude::*;
214 214 match self {
215 215 ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left(
216 216 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)),
217 217 ),
218 218 ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right(
219 219 nodes.par_iter().map(NodeRef::OnDisk),
220 220 ),
221 221 }
222 222 }
223 223
224 224 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
225 225 match self {
226 226 ChildNodesRef::InMemory(nodes) => {
227 227 let mut vec: Vec<_> = nodes
228 228 .iter()
229 229 .map(|(k, v)| NodeRef::InMemory(k, v))
230 230 .collect();
231 231 fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
232 232 match node {
233 233 NodeRef::InMemory(path, _node) => path.base_name(),
234 234 NodeRef::OnDisk(_) => unreachable!(),
235 235 }
236 236 }
237 237 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
238 238 // value: https://github.com/rust-lang/rust/issues/34162
239 239 vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
240 240 vec
241 241 }
242 242 ChildNodesRef::OnDisk(nodes) => {
243 243 // Nodes on disk are already sorted
244 244 nodes.iter().map(NodeRef::OnDisk).collect()
245 245 }
246 246 }
247 247 }
248 248 }
249 249
250 250 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
251 251 pub(super) fn full_path(
252 252 &self,
253 253 on_disk: &'on_disk [u8],
254 254 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
255 255 match self {
256 256 NodeRef::InMemory(path, _node) => Ok(path.full_path()),
257 257 NodeRef::OnDisk(node) => node.full_path(on_disk),
258 258 }
259 259 }
260 260
261 261 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
262 262 /// HgPath>` detached from `'tree`
263 263 pub(super) fn full_path_borrowed(
264 264 &self,
265 265 on_disk: &'on_disk [u8],
266 266 ) -> Result<BorrowedPath<'tree, 'on_disk>, DirstateV2ParseError> {
267 267 match self {
268 268 NodeRef::InMemory(path, _node) => match path.full_path() {
269 269 Cow::Borrowed(on_disk) => Ok(BorrowedPath::OnDisk(on_disk)),
270 270 Cow::Owned(in_memory) => Ok(BorrowedPath::InMemory(in_memory)),
271 271 },
272 272 NodeRef::OnDisk(node) => {
273 273 Ok(BorrowedPath::OnDisk(node.full_path(on_disk)?))
274 274 }
275 275 }
276 276 }
277 277
278 278 pub(super) fn base_name(
279 279 &self,
280 280 on_disk: &'on_disk [u8],
281 281 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
282 282 match self {
283 283 NodeRef::InMemory(path, _node) => Ok(path.base_name()),
284 284 NodeRef::OnDisk(node) => node.base_name(on_disk),
285 285 }
286 286 }
287 287
288 288 pub(super) fn children(
289 289 &self,
290 290 on_disk: &'on_disk [u8],
291 291 ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
292 292 match self {
293 293 NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
294 294 NodeRef::OnDisk(node) => {
295 295 Ok(ChildNodesRef::OnDisk(node.children(on_disk)?))
296 296 }
297 297 }
298 298 }
299 299
300 300 pub(super) fn has_copy_source(&self) -> bool {
301 301 match self {
302 302 NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
303 303 NodeRef::OnDisk(node) => node.has_copy_source(),
304 304 }
305 305 }
306 306
307 307 pub(super) fn copy_source(
308 308 &self,
309 309 on_disk: &'on_disk [u8],
310 310 ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
311 311 match self {
312 312 NodeRef::InMemory(_path, node) => {
313 313 Ok(node.copy_source.as_ref().map(|s| &**s))
314 314 }
315 315 NodeRef::OnDisk(node) => node.copy_source(on_disk),
316 316 }
317 317 }
318 318 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
319 319 /// HgPath>` detached from `'tree`
320 320 pub(super) fn copy_source_borrowed(
321 321 &self,
322 322 on_disk: &'on_disk [u8],
323 323 ) -> Result<Option<BorrowedPath<'tree, 'on_disk>>, DirstateV2ParseError>
324 324 {
325 325 Ok(match self {
326 326 NodeRef::InMemory(_path, node) => {
327 327 node.copy_source.as_ref().map(|source| match source {
328 328 Cow::Borrowed(on_disk) => BorrowedPath::OnDisk(on_disk),
329 329 Cow::Owned(in_memory) => BorrowedPath::InMemory(in_memory),
330 330 })
331 331 }
332 332 NodeRef::OnDisk(node) => node
333 333 .copy_source(on_disk)?
334 334 .map(|source| BorrowedPath::OnDisk(source)),
335 335 })
336 336 }
337 337
338 338 pub(super) fn entry(
339 339 &self,
340 340 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
341 341 match self {
342 342 NodeRef::InMemory(_path, node) => {
343 343 Ok(node.data.as_entry().copied())
344 344 }
345 345 NodeRef::OnDisk(node) => node.entry(),
346 346 }
347 347 }
348 348
349 349 pub(super) fn state(
350 350 &self,
351 351 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
352 352 Ok(self.entry()?.and_then(|e| {
353 353 if e.any_tracked() {
354 354 Some(e.state())
355 355 } else {
356 356 None
357 357 }
358 358 }))
359 359 }
360 360
361 361 pub(super) fn cached_directory_mtime(
362 362 &self,
363 363 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
364 364 match self {
365 365 NodeRef::InMemory(_path, node) => Ok(match node.data {
366 366 NodeData::CachedDirectory { mtime } => Some(mtime),
367 367 _ => None,
368 368 }),
369 369 NodeRef::OnDisk(node) => node.cached_directory_mtime(),
370 370 }
371 371 }
372 372
373 373 pub(super) fn descendants_with_entry_count(&self) -> u32 {
374 374 match self {
375 375 NodeRef::InMemory(_path, node) => {
376 376 node.descendants_with_entry_count
377 377 }
378 378 NodeRef::OnDisk(node) => node.descendants_with_entry_count.get(),
379 379 }
380 380 }
381 381
382 382 pub(super) fn tracked_descendants_count(&self) -> u32 {
383 383 match self {
384 384 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
385 385 NodeRef::OnDisk(node) => node.tracked_descendants_count.get(),
386 386 }
387 387 }
388 388 }
389 389
390 390 /// Represents a file or a directory
391 391 #[derive(Default, Debug)]
392 392 pub(super) struct Node<'on_disk> {
393 393 pub(super) data: NodeData,
394 394
395 395 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
396 396
397 397 pub(super) children: ChildNodes<'on_disk>,
398 398
399 399 /// How many (non-inclusive) descendants of this node have an entry.
400 400 pub(super) descendants_with_entry_count: u32,
401 401
402 402 /// How many (non-inclusive) descendants of this node have an entry whose
403 403 /// state is "tracked".
404 404 pub(super) tracked_descendants_count: u32,
405 405 }
406 406
407 407 #[derive(Debug)]
408 408 pub(super) enum NodeData {
409 409 Entry(DirstateEntry),
410 410 CachedDirectory { mtime: TruncatedTimestamp },
411 411 None,
412 412 }
413 413
414 414 impl Default for NodeData {
415 415 fn default() -> Self {
416 416 NodeData::None
417 417 }
418 418 }
419 419
420 420 impl NodeData {
421 421 fn has_entry(&self) -> bool {
422 422 match self {
423 423 NodeData::Entry(_) => true,
424 424 _ => false,
425 425 }
426 426 }
427 427
428 428 fn as_entry(&self) -> Option<&DirstateEntry> {
429 429 match self {
430 430 NodeData::Entry(entry) => Some(entry),
431 431 _ => None,
432 432 }
433 433 }
434 434
435 435 fn as_entry_mut(&mut self) -> Option<&mut DirstateEntry> {
436 436 match self {
437 437 NodeData::Entry(entry) => Some(entry),
438 438 _ => None,
439 439 }
440 440 }
441 441 }
442 442
443 443 impl<'on_disk> DirstateMap<'on_disk> {
444 444 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
445 445 Self {
446 446 on_disk,
447 447 root: ChildNodes::default(),
448 448 nodes_with_entry_count: 0,
449 449 nodes_with_copy_source_count: 0,
450 450 ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
451 451 unreachable_bytes: 0,
452 452 }
453 453 }
454 454
455 455 #[timed]
456 456 pub fn new_v2(
457 457 on_disk: &'on_disk [u8],
458 458 data_size: usize,
459 459 metadata: &[u8],
460 460 ) -> Result<Self, DirstateError> {
461 461 if let Some(data) = on_disk.get(..data_size) {
462 462 Ok(on_disk::read(data, metadata)?)
463 463 } else {
464 464 Err(DirstateV2ParseError.into())
465 465 }
466 466 }
467 467
468 468 #[timed]
469 469 pub fn new_v1(
470 470 on_disk: &'on_disk [u8],
471 471 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
472 472 let mut map = Self::empty(on_disk);
473 473 if map.on_disk.is_empty() {
474 474 return Ok((map, None));
475 475 }
476 476
477 477 let parents = parse_dirstate_entries(
478 478 map.on_disk,
479 479 |path, entry, copy_source| {
480 480 let tracked = entry.state().is_tracked();
481 481 let node = Self::get_or_insert_node(
482 482 map.on_disk,
483 483 &mut map.unreachable_bytes,
484 484 &mut map.root,
485 485 path,
486 486 WithBasename::to_cow_borrowed,
487 487 |ancestor| {
488 488 if tracked {
489 489 ancestor.tracked_descendants_count += 1
490 490 }
491 491 ancestor.descendants_with_entry_count += 1
492 492 },
493 493 )?;
494 494 assert!(
495 495 !node.data.has_entry(),
496 496 "duplicate dirstate entry in read"
497 497 );
498 498 assert!(
499 499 node.copy_source.is_none(),
500 500 "duplicate dirstate entry in read"
501 501 );
502 502 node.data = NodeData::Entry(*entry);
503 503 node.copy_source = copy_source.map(Cow::Borrowed);
504 504 map.nodes_with_entry_count += 1;
505 505 if copy_source.is_some() {
506 506 map.nodes_with_copy_source_count += 1
507 507 }
508 508 Ok(())
509 509 },
510 510 )?;
511 511 let parents = Some(parents.clone());
512 512
513 513 Ok((map, parents))
514 514 }
515 515
516 516 /// Assuming dirstate-v2 format, returns whether the next write should
517 517 /// append to the existing data file that contains `self.on_disk` (true),
518 518 /// or create a new data file from scratch (false).
519 519 pub(super) fn write_should_append(&self) -> bool {
520 520 let ratio = self.unreachable_bytes as f32 / self.on_disk.len() as f32;
521 521 ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
522 522 }
523 523
524 524 fn get_node<'tree>(
525 525 &'tree self,
526 526 path: &HgPath,
527 527 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
528 528 let mut children = self.root.as_ref();
529 529 let mut components = path.components();
530 530 let mut component =
531 531 components.next().expect("expected at least one components");
532 532 loop {
533 533 if let Some(child) = children.get(component, self.on_disk)? {
534 534 if let Some(next_component) = components.next() {
535 535 component = next_component;
536 536 children = child.children(self.on_disk)?;
537 537 } else {
538 538 return Ok(Some(child));
539 539 }
540 540 } else {
541 541 return Ok(None);
542 542 }
543 543 }
544 544 }
545 545
546 546 /// Returns a mutable reference to the node at `path` if it exists
547 547 ///
548 548 /// This takes `root` instead of `&mut self` so that callers can mutate
549 549 /// other fields while the returned borrow is still valid
550 550 fn get_node_mut<'tree>(
551 551 on_disk: &'on_disk [u8],
552 552 unreachable_bytes: &mut u32,
553 553 root: &'tree mut ChildNodes<'on_disk>,
554 554 path: &HgPath,
555 555 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
556 556 let mut children = root;
557 557 let mut components = path.components();
558 558 let mut component =
559 559 components.next().expect("expected at least one components");
560 560 loop {
561 561 if let Some(child) = children
562 562 .make_mut(on_disk, unreachable_bytes)?
563 563 .get_mut(component)
564 564 {
565 565 if let Some(next_component) = components.next() {
566 566 component = next_component;
567 567 children = &mut child.children;
568 568 } else {
569 569 return Ok(Some(child));
570 570 }
571 571 } else {
572 572 return Ok(None);
573 573 }
574 574 }
575 575 }
576 576
577 577 pub(super) fn get_or_insert<'tree, 'path>(
578 578 &'tree mut self,
579 579 path: &HgPath,
580 580 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
581 581 Self::get_or_insert_node(
582 582 self.on_disk,
583 583 &mut self.unreachable_bytes,
584 584 &mut self.root,
585 585 path,
586 586 WithBasename::to_cow_owned,
587 587 |_| {},
588 588 )
589 589 }
590 590
591 591 fn get_or_insert_node<'tree, 'path>(
592 592 on_disk: &'on_disk [u8],
593 593 unreachable_bytes: &mut u32,
594 594 root: &'tree mut ChildNodes<'on_disk>,
595 595 path: &'path HgPath,
596 596 to_cow: impl Fn(
597 597 WithBasename<&'path HgPath>,
598 598 ) -> WithBasename<Cow<'on_disk, HgPath>>,
599 599 mut each_ancestor: impl FnMut(&mut Node),
600 600 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
601 601 let mut child_nodes = root;
602 602 let mut inclusive_ancestor_paths =
603 603 WithBasename::inclusive_ancestors_of(path);
604 604 let mut ancestor_path = inclusive_ancestor_paths
605 605 .next()
606 606 .expect("expected at least one inclusive ancestor");
607 607 loop {
608 608 let (_, child_node) = child_nodes
609 609 .make_mut(on_disk, unreachable_bytes)?
610 610 .raw_entry_mut()
611 611 .from_key(ancestor_path.base_name())
612 612 .or_insert_with(|| (to_cow(ancestor_path), Node::default()));
613 613 if let Some(next) = inclusive_ancestor_paths.next() {
614 614 each_ancestor(child_node);
615 615 ancestor_path = next;
616 616 child_nodes = &mut child_node.children;
617 617 } else {
618 618 return Ok(child_node);
619 619 }
620 620 }
621 621 }
622 622
623 623 fn reset_state(
624 624 &mut self,
625 625 filename: &HgPath,
626 626 old_entry_opt: Option<DirstateEntry>,
627 627 wc_tracked: bool,
628 628 p1_tracked: bool,
629 629 p2_info: bool,
630 630 has_meaningful_mtime: bool,
631 631 parent_file_data_opt: Option<ParentFileData>,
632 632 ) -> Result<(), DirstateError> {
633 633 let (had_entry, was_tracked) = match old_entry_opt {
634 634 Some(old_entry) => (true, old_entry.tracked()),
635 635 None => (false, false),
636 636 };
637 637 let node = Self::get_or_insert_node(
638 638 self.on_disk,
639 639 &mut self.unreachable_bytes,
640 640 &mut self.root,
641 641 filename,
642 642 WithBasename::to_cow_owned,
643 643 |ancestor| {
644 644 if !had_entry {
645 645 ancestor.descendants_with_entry_count += 1;
646 646 }
647 647 if was_tracked {
648 648 if !wc_tracked {
649 649 ancestor.tracked_descendants_count = ancestor
650 650 .tracked_descendants_count
651 651 .checked_sub(1)
652 652 .expect("tracked count to be >= 0");
653 653 }
654 654 } else {
655 655 if wc_tracked {
656 656 ancestor.tracked_descendants_count += 1;
657 657 }
658 658 }
659 659 },
660 660 )?;
661 661
662 662 let v2_data = if let Some(parent_file_data) = parent_file_data_opt {
663 663 DirstateV2Data {
664 664 wc_tracked,
665 665 p1_tracked,
666 666 p2_info,
667 667 mode_size: parent_file_data.mode_size,
668 668 mtime: if has_meaningful_mtime {
669 669 parent_file_data.mtime
670 670 } else {
671 671 None
672 672 },
673 673 ..Default::default()
674 674 }
675 675 } else {
676 676 DirstateV2Data {
677 677 wc_tracked,
678 678 p1_tracked,
679 679 p2_info,
680 680 ..Default::default()
681 681 }
682 682 };
683 683 if !had_entry {
684 684 self.nodes_with_entry_count += 1;
685 685 }
686 686 node.data = NodeData::Entry(DirstateEntry::from_v2_data(v2_data));
687 687 Ok(())
688 688 }
689 689
690 690 fn set_tracked(
691 691 &mut self,
692 692 filename: &HgPath,
693 693 old_entry_opt: Option<DirstateEntry>,
694 694 ) -> Result<bool, DirstateV2ParseError> {
695 695 let was_tracked = old_entry_opt.map_or(false, |e| e.tracked());
696 696 let had_entry = old_entry_opt.is_some();
697 697 let tracked_count_increment = if was_tracked { 0 } else { 1 };
698 698 let mut new = false;
699 699
700 700 let node = Self::get_or_insert_node(
701 701 self.on_disk,
702 702 &mut self.unreachable_bytes,
703 703 &mut self.root,
704 704 filename,
705 705 WithBasename::to_cow_owned,
706 706 |ancestor| {
707 707 if !had_entry {
708 708 ancestor.descendants_with_entry_count += 1;
709 709 }
710 710
711 711 ancestor.tracked_descendants_count += tracked_count_increment;
712 712 },
713 713 )?;
714 714 let new_entry = if let Some(old_entry) = old_entry_opt {
715 715 let mut e = old_entry.clone();
716 716 if e.tracked() {
717 717 // XXX
718 718 // This is probably overkill for more case, but we need this to
719 719 // fully replace the `normallookup` call with `set_tracked`
720 720 // one. Consider smoothing this in the future.
721 721 e.set_possibly_dirty();
722 722 } else {
723 723 new = true;
724 724 e.set_tracked();
725 725 }
726 726 e
727 727 } else {
728 728 self.nodes_with_entry_count += 1;
729 729 new = true;
730 730 DirstateEntry::new_tracked()
731 731 };
732 732 node.data = NodeData::Entry(new_entry);
733 733 Ok(new)
734 734 }
735 735
736 736 /// It is the responsibility of the caller to know that there was an entry
737 737 /// there before. Does not handle the removal of copy source
738 738 fn set_untracked(
739 739 &mut self,
740 740 filename: &HgPath,
741 741 old_entry: DirstateEntry,
742 742 ) -> Result<(), DirstateV2ParseError> {
743 743 let node = Self::get_or_insert_node(
744 744 self.on_disk,
745 745 &mut self.unreachable_bytes,
746 746 &mut self.root,
747 747 filename,
748 748 WithBasename::to_cow_owned,
749 749 |ancestor| {
750 750 ancestor.tracked_descendants_count = ancestor
751 751 .tracked_descendants_count
752 752 .checked_sub(1)
753 753 .expect("tracked_descendants_count should be >= 0");
754 754 },
755 755 )?;
756 756 let mut new_entry = old_entry.clone();
757 757 new_entry.set_untracked();
758 758 node.data = NodeData::Entry(new_entry);
759 759 Ok(())
760 760 }
761 761
762 762 fn set_clean(
763 763 &mut self,
764 764 filename: &HgPath,
765 765 old_entry: DirstateEntry,
766 766 mode: u32,
767 767 size: u32,
768 768 mtime: TruncatedTimestamp,
769 769 ) -> Result<(), DirstateError> {
770 770 let node = Self::get_or_insert_node(
771 771 self.on_disk,
772 772 &mut self.unreachable_bytes,
773 773 &mut self.root,
774 774 filename,
775 775 WithBasename::to_cow_owned,
776 776 |ancestor| {
777 777 if !old_entry.tracked() {
778 778 ancestor.tracked_descendants_count += 1;
779 779 }
780 780 },
781 781 )?;
782 782 let mut new_entry = old_entry.clone();
783 783 new_entry.set_clean(mode, size, mtime);
784 784 node.data = NodeData::Entry(new_entry);
785 785 Ok(())
786 786 }
787 787
788 788 fn set_possibly_dirty(
789 789 &mut self,
790 790 filename: &HgPath,
791 791 ) -> Result<(), DirstateError> {
792 792 let node = Self::get_or_insert_node(
793 793 self.on_disk,
794 794 &mut self.unreachable_bytes,
795 795 &mut self.root,
796 796 filename,
797 797 WithBasename::to_cow_owned,
798 798 |_ancestor| {},
799 799 )?;
800 800 let entry = node.data.as_entry_mut().expect("entry should exist");
801 801 entry.set_possibly_dirty();
802 802 node.data = NodeData::Entry(*entry);
803 803 Ok(())
804 804 }
805 805
806 /// Clears the cached mtime for the (potential) folder at `path`.
807 pub(super) fn clear_cached_mtime(
808 &mut self,
809 path: &HgPath,
810 ) -> Result<(), DirstateV2ParseError> {
811 let node = match DirstateMap::get_node_mut(
812 self.on_disk,
813 &mut self.unreachable_bytes,
814 &mut self.root,
815 path,
816 )? {
817 Some(node) => node,
818 None => return Ok(()),
819 };
820 if let NodeData::CachedDirectory { .. } = &node.data {
821 node.data = NodeData::None
822 }
823 Ok(())
824 }
825
806 826 fn iter_nodes<'tree>(
807 827 &'tree self,
808 828 ) -> impl Iterator<
809 829 Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>,
810 830 > + 'tree {
811 831 // Depth first tree traversal.
812 832 //
813 833 // If we could afford internal iteration and recursion,
814 834 // this would look like:
815 835 //
816 836 // ```
817 837 // fn traverse_children(
818 838 // children: &ChildNodes,
819 839 // each: &mut impl FnMut(&Node),
820 840 // ) {
821 841 // for child in children.values() {
822 842 // traverse_children(&child.children, each);
823 843 // each(child);
824 844 // }
825 845 // }
826 846 // ```
827 847 //
828 848 // However we want an external iterator and therefore can’t use the
829 849 // call stack. Use an explicit stack instead:
830 850 let mut stack = Vec::new();
831 851 let mut iter = self.root.as_ref().iter();
832 852 std::iter::from_fn(move || {
833 853 while let Some(child_node) = iter.next() {
834 854 let children = match child_node.children(self.on_disk) {
835 855 Ok(children) => children,
836 856 Err(error) => return Some(Err(error)),
837 857 };
838 858 // Pseudo-recursion
839 859 let new_iter = children.iter();
840 860 let old_iter = std::mem::replace(&mut iter, new_iter);
841 861 stack.push((child_node, old_iter));
842 862 }
843 863 // Found the end of a `children.iter()` iterator.
844 864 if let Some((child_node, next_iter)) = stack.pop() {
845 865 // "Return" from pseudo-recursion by restoring state from the
846 866 // explicit stack
847 867 iter = next_iter;
848 868
849 869 Some(Ok(child_node))
850 870 } else {
851 871 // Reached the bottom of the stack, we’re done
852 872 None
853 873 }
854 874 })
855 875 }
856 876
857 877 fn count_dropped_path(unreachable_bytes: &mut u32, path: &Cow<HgPath>) {
858 878 if let Cow::Borrowed(path) = path {
859 879 *unreachable_bytes += path.len() as u32
860 880 }
861 881 }
862 882 }
863 883
864 884 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
865 885 ///
866 886 /// The callback is only called for incoming `Ok` values. Errors are passed
867 887 /// through as-is. In order to let it use the `?` operator the callback is
868 888 /// expected to return a `Result` of `Option`, instead of an `Option` of
869 889 /// `Result`.
870 890 fn filter_map_results<'a, I, F, A, B, E>(
871 891 iter: I,
872 892 f: F,
873 893 ) -> impl Iterator<Item = Result<B, E>> + 'a
874 894 where
875 895 I: Iterator<Item = Result<A, E>> + 'a,
876 896 F: Fn(A) -> Result<Option<B>, E> + 'a,
877 897 {
878 898 iter.filter_map(move |result| match result {
879 899 Ok(node) => f(node).transpose(),
880 900 Err(e) => Some(Err(e)),
881 901 })
882 902 }
883 903
884 904 impl OwningDirstateMap {
885 905 pub fn clear(&mut self) {
886 906 self.with_dmap_mut(|map| {
887 907 map.root = Default::default();
888 908 map.nodes_with_entry_count = 0;
889 909 map.nodes_with_copy_source_count = 0;
890 910 });
891 911 }
892 912
893 913 pub fn set_tracked(
894 914 &mut self,
895 915 filename: &HgPath,
896 916 ) -> Result<bool, DirstateV2ParseError> {
897 917 let old_entry_opt = self.get(filename)?;
898 918 self.with_dmap_mut(|map| map.set_tracked(filename, old_entry_opt))
899 919 }
900 920
901 921 pub fn set_untracked(
902 922 &mut self,
903 923 filename: &HgPath,
904 924 ) -> Result<bool, DirstateError> {
905 925 let old_entry_opt = self.get(filename)?;
906 926 match old_entry_opt {
907 927 None => Ok(false),
908 928 Some(old_entry) => {
909 929 if !old_entry.tracked() {
910 930 // `DirstateMap::set_untracked` is not a noop if
911 931 // already not tracked as it will decrement the
912 932 // tracked counters while going down.
913 933 return Ok(true);
914 934 }
915 935 if old_entry.added() {
916 936 // Untracking an "added" entry will just result in a
917 937 // worthless entry (and other parts of the code will
918 938 // complain about it), just drop it entirely.
919 939 self.drop_entry_and_copy_source(filename)?;
920 940 return Ok(true);
921 941 }
922 942 if !old_entry.p2_info() {
923 943 self.copy_map_remove(filename)?;
924 944 }
925 945
926 946 self.with_dmap_mut(|map| {
927 947 map.set_untracked(filename, old_entry)?;
928 948 Ok(true)
929 949 })
930 950 }
931 951 }
932 952 }
933 953
934 954 pub fn set_clean(
935 955 &mut self,
936 956 filename: &HgPath,
937 957 mode: u32,
938 958 size: u32,
939 959 mtime: TruncatedTimestamp,
940 960 ) -> Result<(), DirstateError> {
941 961 let old_entry = match self.get(filename)? {
942 962 None => {
943 963 return Err(
944 964 DirstateMapError::PathNotFound(filename.into()).into()
945 965 )
946 966 }
947 967 Some(e) => e,
948 968 };
949 969 self.copy_map_remove(filename)?;
950 970 self.with_dmap_mut(|map| {
951 971 map.set_clean(filename, old_entry, mode, size, mtime)
952 972 })
953 973 }
954 974
955 975 pub fn set_possibly_dirty(
956 976 &mut self,
957 977 filename: &HgPath,
958 978 ) -> Result<(), DirstateError> {
959 979 if self.get(filename)?.is_none() {
960 980 return Err(DirstateMapError::PathNotFound(filename.into()).into());
961 981 }
962 982 self.with_dmap_mut(|map| map.set_possibly_dirty(filename))
963 983 }
964 984
965 985 pub fn reset_state(
966 986 &mut self,
967 987 filename: &HgPath,
968 988 wc_tracked: bool,
969 989 p1_tracked: bool,
970 990 p2_info: bool,
971 991 has_meaningful_mtime: bool,
972 992 parent_file_data_opt: Option<ParentFileData>,
973 993 ) -> Result<(), DirstateError> {
974 994 if !(p1_tracked || p2_info || wc_tracked) {
975 995 self.drop_entry_and_copy_source(filename)?;
976 996 return Ok(());
977 997 }
978 998 self.copy_map_remove(filename)?;
979 999 let old_entry_opt = self.get(filename)?;
980 1000 self.with_dmap_mut(|map| {
981 1001 map.reset_state(
982 1002 filename,
983 1003 old_entry_opt,
984 1004 wc_tracked,
985 1005 p1_tracked,
986 1006 p2_info,
987 1007 has_meaningful_mtime,
988 1008 parent_file_data_opt,
989 1009 )
990 1010 })
991 1011 }
992 1012
993 1013 pub fn drop_entry_and_copy_source(
994 1014 &mut self,
995 1015 filename: &HgPath,
996 1016 ) -> Result<(), DirstateError> {
997 1017 let was_tracked = self.get(filename)?.map_or(false, |e| e.tracked());
998 1018 struct Dropped {
999 1019 was_tracked: bool,
1000 1020 had_entry: bool,
1001 1021 had_copy_source: bool,
1002 1022 }
1003 1023
1004 1024 /// If this returns `Ok(Some((dropped, removed)))`, then
1005 1025 ///
1006 1026 /// * `dropped` is about the leaf node that was at `filename`
1007 1027 /// * `removed` is whether this particular level of recursion just
1008 1028 /// removed a node in `nodes`.
1009 1029 fn recur<'on_disk>(
1010 1030 on_disk: &'on_disk [u8],
1011 1031 unreachable_bytes: &mut u32,
1012 1032 nodes: &mut ChildNodes<'on_disk>,
1013 1033 path: &HgPath,
1014 1034 ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
1015 1035 let (first_path_component, rest_of_path) =
1016 1036 path.split_first_component();
1017 1037 let nodes = nodes.make_mut(on_disk, unreachable_bytes)?;
1018 1038 let node = if let Some(node) = nodes.get_mut(first_path_component)
1019 1039 {
1020 1040 node
1021 1041 } else {
1022 1042 return Ok(None);
1023 1043 };
1024 1044 let dropped;
1025 1045 if let Some(rest) = rest_of_path {
1026 1046 if let Some((d, removed)) = recur(
1027 1047 on_disk,
1028 1048 unreachable_bytes,
1029 1049 &mut node.children,
1030 1050 rest,
1031 1051 )? {
1032 1052 dropped = d;
1033 1053 if dropped.had_entry {
1034 1054 node.descendants_with_entry_count = node
1035 1055 .descendants_with_entry_count
1036 1056 .checked_sub(1)
1037 1057 .expect(
1038 1058 "descendants_with_entry_count should be >= 0",
1039 1059 );
1040 1060 }
1041 1061 if dropped.was_tracked {
1042 1062 node.tracked_descendants_count = node
1043 1063 .tracked_descendants_count
1044 1064 .checked_sub(1)
1045 1065 .expect(
1046 1066 "tracked_descendants_count should be >= 0",
1047 1067 );
1048 1068 }
1049 1069
1050 1070 // Directory caches must be invalidated when removing a
1051 1071 // child node
1052 1072 if removed {
1053 1073 if let NodeData::CachedDirectory { .. } = &node.data {
1054 1074 node.data = NodeData::None
1055 1075 }
1056 1076 }
1057 1077 } else {
1058 1078 return Ok(None);
1059 1079 }
1060 1080 } else {
1061 1081 let entry = node.data.as_entry();
1062 1082 let was_tracked = entry.map_or(false, |entry| entry.tracked());
1063 1083 let had_entry = entry.is_some();
1064 1084 if had_entry {
1065 1085 node.data = NodeData::None
1066 1086 }
1067 1087 let mut had_copy_source = false;
1068 1088 if let Some(source) = &node.copy_source {
1069 1089 DirstateMap::count_dropped_path(unreachable_bytes, source);
1070 1090 had_copy_source = true;
1071 1091 node.copy_source = None
1072 1092 }
1073 1093 dropped = Dropped {
1074 1094 was_tracked,
1075 1095 had_entry,
1076 1096 had_copy_source,
1077 1097 };
1078 1098 }
1079 1099 // After recursion, for both leaf (rest_of_path is None) nodes and
1080 1100 // parent nodes, remove a node if it just became empty.
1081 1101 let remove = !node.data.has_entry()
1082 1102 && node.copy_source.is_none()
1083 1103 && node.children.is_empty();
1084 1104 if remove {
1085 1105 let (key, _) =
1086 1106 nodes.remove_entry(first_path_component).unwrap();
1087 1107 DirstateMap::count_dropped_path(
1088 1108 unreachable_bytes,
1089 1109 key.full_path(),
1090 1110 )
1091 1111 }
1092 1112 Ok(Some((dropped, remove)))
1093 1113 }
1094 1114
1095 1115 self.with_dmap_mut(|map| {
1096 1116 if let Some((dropped, _removed)) = recur(
1097 1117 map.on_disk,
1098 1118 &mut map.unreachable_bytes,
1099 1119 &mut map.root,
1100 1120 filename,
1101 1121 )? {
1102 1122 if dropped.had_entry {
1103 1123 map.nodes_with_entry_count = map
1104 1124 .nodes_with_entry_count
1105 1125 .checked_sub(1)
1106 1126 .expect("nodes_with_entry_count should be >= 0");
1107 1127 }
1108 1128 if dropped.had_copy_source {
1109 1129 map.nodes_with_copy_source_count = map
1110 1130 .nodes_with_copy_source_count
1111 1131 .checked_sub(1)
1112 1132 .expect("nodes_with_copy_source_count should be >= 0");
1113 1133 }
1114 1134 } else {
1115 1135 debug_assert!(!was_tracked);
1116 1136 }
1117 1137 Ok(())
1118 1138 })
1119 1139 }
1120 1140
1121 1141 pub fn has_tracked_dir(
1122 1142 &mut self,
1123 1143 directory: &HgPath,
1124 1144 ) -> Result<bool, DirstateError> {
1125 1145 self.with_dmap_mut(|map| {
1126 1146 if let Some(node) = map.get_node(directory)? {
1127 1147 // A node without a `DirstateEntry` was created to hold child
1128 1148 // nodes, and is therefore a directory.
1129 1149 let state = node.state()?;
1130 1150 Ok(state.is_none() && node.tracked_descendants_count() > 0)
1131 1151 } else {
1132 1152 Ok(false)
1133 1153 }
1134 1154 })
1135 1155 }
1136 1156
1137 1157 pub fn has_dir(
1138 1158 &mut self,
1139 1159 directory: &HgPath,
1140 1160 ) -> Result<bool, DirstateError> {
1141 1161 self.with_dmap_mut(|map| {
1142 1162 if let Some(node) = map.get_node(directory)? {
1143 1163 // A node without a `DirstateEntry` was created to hold child
1144 1164 // nodes, and is therefore a directory.
1145 1165 let state = node.state()?;
1146 1166 Ok(state.is_none() && node.descendants_with_entry_count() > 0)
1147 1167 } else {
1148 1168 Ok(false)
1149 1169 }
1150 1170 })
1151 1171 }
1152 1172
1153 1173 #[timed]
1154 1174 pub fn pack_v1(
1155 1175 &self,
1156 1176 parents: DirstateParents,
1157 1177 ) -> Result<Vec<u8>, DirstateError> {
1158 1178 let map = self.get_map();
1159 1179 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
1160 1180 // reallocations
1161 1181 let mut size = parents.as_bytes().len();
1162 1182 for node in map.iter_nodes() {
1163 1183 let node = node?;
1164 1184 if node.entry()?.is_some() {
1165 1185 size += packed_entry_size(
1166 1186 node.full_path(map.on_disk)?,
1167 1187 node.copy_source(map.on_disk)?,
1168 1188 );
1169 1189 }
1170 1190 }
1171 1191
1172 1192 let mut packed = Vec::with_capacity(size);
1173 1193 packed.extend(parents.as_bytes());
1174 1194
1175 1195 for node in map.iter_nodes() {
1176 1196 let node = node?;
1177 1197 if let Some(entry) = node.entry()? {
1178 1198 pack_entry(
1179 1199 node.full_path(map.on_disk)?,
1180 1200 &entry,
1181 1201 node.copy_source(map.on_disk)?,
1182 1202 &mut packed,
1183 1203 );
1184 1204 }
1185 1205 }
1186 1206 Ok(packed)
1187 1207 }
1188 1208
1189 1209 /// Returns new data and metadata together with whether that data should be
1190 1210 /// appended to the existing data file whose content is at
1191 1211 /// `map.on_disk` (true), instead of written to a new data file
1192 1212 /// (false).
1193 1213 #[timed]
1194 1214 pub fn pack_v2(
1195 1215 &self,
1196 1216 can_append: bool,
1197 1217 ) -> Result<(Vec<u8>, on_disk::TreeMetadata, bool), DirstateError> {
1198 1218 let map = self.get_map();
1199 1219 on_disk::write(map, can_append)
1200 1220 }
1201 1221
1202 1222 /// `callback` allows the caller to process and do something with the
1203 1223 /// results of the status. This is needed to do so efficiently (i.e.
1204 1224 /// without cloning the `DirstateStatus` object with its paths) because
1205 1225 /// we need to borrow from `Self`.
1206 1226 pub fn with_status<R>(
1207 1227 &mut self,
1208 1228 matcher: &(dyn Matcher + Sync),
1209 1229 root_dir: PathBuf,
1210 1230 ignore_files: Vec<PathBuf>,
1211 1231 options: StatusOptions,
1212 1232 callback: impl for<'r> FnOnce(
1213 1233 Result<(DirstateStatus<'r>, Vec<PatternFileWarning>), StatusError>,
1214 1234 ) -> R,
1215 1235 ) -> R {
1216 1236 self.with_dmap_mut(|map| {
1217 1237 callback(super::status::status(
1218 1238 map,
1219 1239 matcher,
1220 1240 root_dir,
1221 1241 ignore_files,
1222 1242 options,
1223 1243 ))
1224 1244 })
1225 1245 }
1226 1246
1227 1247 pub fn copy_map_len(&self) -> usize {
1228 1248 let map = self.get_map();
1229 1249 map.nodes_with_copy_source_count as usize
1230 1250 }
1231 1251
1232 1252 pub fn copy_map_iter(&self) -> CopyMapIter<'_> {
1233 1253 let map = self.get_map();
1234 1254 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1235 1255 Ok(if let Some(source) = node.copy_source(map.on_disk)? {
1236 1256 Some((node.full_path(map.on_disk)?, source))
1237 1257 } else {
1238 1258 None
1239 1259 })
1240 1260 }))
1241 1261 }
1242 1262
1243 1263 pub fn copy_map_contains_key(
1244 1264 &self,
1245 1265 key: &HgPath,
1246 1266 ) -> Result<bool, DirstateV2ParseError> {
1247 1267 let map = self.get_map();
1248 1268 Ok(if let Some(node) = map.get_node(key)? {
1249 1269 node.has_copy_source()
1250 1270 } else {
1251 1271 false
1252 1272 })
1253 1273 }
1254 1274
1255 1275 pub fn copy_map_get(
1256 1276 &self,
1257 1277 key: &HgPath,
1258 1278 ) -> Result<Option<&HgPath>, DirstateV2ParseError> {
1259 1279 let map = self.get_map();
1260 1280 if let Some(node) = map.get_node(key)? {
1261 1281 if let Some(source) = node.copy_source(map.on_disk)? {
1262 1282 return Ok(Some(source));
1263 1283 }
1264 1284 }
1265 1285 Ok(None)
1266 1286 }
1267 1287
1268 1288 pub fn copy_map_remove(
1269 1289 &mut self,
1270 1290 key: &HgPath,
1271 1291 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1272 1292 self.with_dmap_mut(|map| {
1273 1293 let count = &mut map.nodes_with_copy_source_count;
1274 1294 let unreachable_bytes = &mut map.unreachable_bytes;
1275 1295 Ok(DirstateMap::get_node_mut(
1276 1296 map.on_disk,
1277 1297 unreachable_bytes,
1278 1298 &mut map.root,
1279 1299 key,
1280 1300 )?
1281 1301 .and_then(|node| {
1282 1302 if let Some(source) = &node.copy_source {
1283 1303 *count -= 1;
1284 1304 DirstateMap::count_dropped_path(unreachable_bytes, source);
1285 1305 }
1286 1306 node.copy_source.take().map(Cow::into_owned)
1287 1307 }))
1288 1308 })
1289 1309 }
1290 1310
1291 1311 pub fn copy_map_insert(
1292 1312 &mut self,
1293 1313 key: &HgPath,
1294 1314 value: &HgPath,
1295 1315 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1296 1316 self.with_dmap_mut(|map| {
1297 1317 let node = DirstateMap::get_or_insert_node(
1298 1318 map.on_disk,
1299 1319 &mut map.unreachable_bytes,
1300 1320 &mut map.root,
1301 1321 &key,
1302 1322 WithBasename::to_cow_owned,
1303 1323 |_ancestor| {},
1304 1324 )?;
1305 1325 if node.copy_source.is_none() {
1306 1326 map.nodes_with_copy_source_count += 1
1307 1327 }
1308 1328 Ok(node
1309 1329 .copy_source
1310 1330 .replace(value.to_owned().into())
1311 1331 .map(Cow::into_owned))
1312 1332 })
1313 1333 }
1314 1334
1315 1335 pub fn len(&self) -> usize {
1316 1336 let map = self.get_map();
1317 1337 map.nodes_with_entry_count as usize
1318 1338 }
1319 1339
1320 1340 pub fn contains_key(
1321 1341 &self,
1322 1342 key: &HgPath,
1323 1343 ) -> Result<bool, DirstateV2ParseError> {
1324 1344 Ok(self.get(key)?.is_some())
1325 1345 }
1326 1346
1327 1347 pub fn get(
1328 1348 &self,
1329 1349 key: &HgPath,
1330 1350 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
1331 1351 let map = self.get_map();
1332 1352 Ok(if let Some(node) = map.get_node(key)? {
1333 1353 node.entry()?
1334 1354 } else {
1335 1355 None
1336 1356 })
1337 1357 }
1338 1358
1339 1359 pub fn iter(&self) -> StateMapIter<'_> {
1340 1360 let map = self.get_map();
1341 1361 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1342 1362 Ok(if let Some(entry) = node.entry()? {
1343 1363 Some((node.full_path(map.on_disk)?, entry))
1344 1364 } else {
1345 1365 None
1346 1366 })
1347 1367 }))
1348 1368 }
1349 1369
1350 1370 pub fn iter_tracked_dirs(
1351 1371 &mut self,
1352 1372 ) -> Result<
1353 1373 Box<
1354 1374 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
1355 1375 + Send
1356 1376 + '_,
1357 1377 >,
1358 1378 DirstateError,
1359 1379 > {
1360 1380 let map = self.get_map();
1361 1381 let on_disk = map.on_disk;
1362 1382 Ok(Box::new(filter_map_results(
1363 1383 map.iter_nodes(),
1364 1384 move |node| {
1365 1385 Ok(if node.tracked_descendants_count() > 0 {
1366 1386 Some(node.full_path(on_disk)?)
1367 1387 } else {
1368 1388 None
1369 1389 })
1370 1390 },
1371 1391 )))
1372 1392 }
1373 1393
1374 1394 /// Only public because it needs to be exposed to the Python layer.
1375 1395 /// It is not the full `setparents` logic, only the parts that mutate the
1376 1396 /// entries.
1377 1397 pub fn setparents_fixup(
1378 1398 &mut self,
1379 1399 ) -> Result<Vec<(HgPathBuf, HgPathBuf)>, DirstateV2ParseError> {
1380 1400 // XXX
1381 1401 // All the copying and re-querying is quite inefficient, but this is
1382 1402 // still a lot better than doing it from Python.
1383 1403 //
1384 1404 // The better solution is to develop a mechanism for `iter_mut`,
1385 1405 // which will be a lot more involved: we're dealing with a lazy,
1386 1406 // append-mostly, tree-like data structure. This will do for now.
1387 1407 let mut copies = vec![];
1388 1408 let mut files_with_p2_info = vec![];
1389 1409 for res in self.iter() {
1390 1410 let (path, entry) = res?;
1391 1411 if entry.p2_info() {
1392 1412 files_with_p2_info.push(path.to_owned())
1393 1413 }
1394 1414 }
1395 1415 self.with_dmap_mut(|map| {
1396 1416 for path in files_with_p2_info.iter() {
1397 1417 let node = map.get_or_insert(path)?;
1398 1418 let entry =
1399 1419 node.data.as_entry_mut().expect("entry should exist");
1400 1420 entry.drop_merge_data();
1401 1421 if let Some(source) = node.copy_source.take().as_deref() {
1402 1422 copies.push((path.to_owned(), source.to_owned()));
1403 1423 }
1404 1424 }
1405 1425 Ok(copies)
1406 1426 })
1407 1427 }
1408 1428
1409 1429 pub fn debug_iter(
1410 1430 &self,
1411 1431 all: bool,
1412 1432 ) -> Box<
1413 1433 dyn Iterator<
1414 1434 Item = Result<
1415 1435 (&HgPath, (u8, i32, i32, i32)),
1416 1436 DirstateV2ParseError,
1417 1437 >,
1418 1438 > + Send
1419 1439 + '_,
1420 1440 > {
1421 1441 let map = self.get_map();
1422 1442 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1423 1443 let debug_tuple = if let Some(entry) = node.entry()? {
1424 1444 entry.debug_tuple()
1425 1445 } else if !all {
1426 1446 return Ok(None);
1427 1447 } else if let Some(mtime) = node.cached_directory_mtime()? {
1428 1448 (b' ', 0, -1, mtime.truncated_seconds() as i32)
1429 1449 } else {
1430 1450 (b' ', 0, -1, -1)
1431 1451 };
1432 1452 Ok(Some((node.full_path(map.on_disk)?, debug_tuple)))
1433 1453 }))
1434 1454 }
1435 1455 }
1436 1456 #[cfg(test)]
1437 1457 mod tests {
1438 1458 use super::*;
1439 1459
1440 1460 /// Shortcut to return tracked descendants of a path.
1441 1461 /// Panics if the path does not exist.
1442 1462 fn tracked_descendants(map: &OwningDirstateMap, path: &[u8]) -> u32 {
1443 1463 let path = dbg!(HgPath::new(path));
1444 1464 let node = map.get_map().get_node(path);
1445 1465 node.unwrap().unwrap().tracked_descendants_count()
1446 1466 }
1447 1467
1448 1468 /// Shortcut to return descendants with an entry.
1449 1469 /// Panics if the path does not exist.
1450 1470 fn descendants_with_an_entry(map: &OwningDirstateMap, path: &[u8]) -> u32 {
1451 1471 let path = dbg!(HgPath::new(path));
1452 1472 let node = map.get_map().get_node(path);
1453 1473 node.unwrap().unwrap().descendants_with_entry_count()
1454 1474 }
1455 1475
1456 1476 fn assert_does_not_exist(map: &OwningDirstateMap, path: &[u8]) {
1457 1477 let path = dbg!(HgPath::new(path));
1458 1478 let node = map.get_map().get_node(path);
1459 1479 assert!(node.unwrap().is_none());
1460 1480 }
1461 1481
1462 1482 /// Shortcut for path creation in tests
1463 1483 fn p(b: &[u8]) -> &HgPath {
1464 1484 HgPath::new(b)
1465 1485 }
1466 1486
1467 1487 /// Test the very simple case a single tracked file
1468 1488 #[test]
1469 1489 fn test_tracked_descendants_simple() -> Result<(), DirstateError> {
1470 1490 let mut map = OwningDirstateMap::new_empty(vec![]);
1471 1491 assert_eq!(map.len(), 0);
1472 1492
1473 1493 map.set_tracked(p(b"some/nested/path"))?;
1474 1494
1475 1495 assert_eq!(map.len(), 1);
1476 1496 assert_eq!(tracked_descendants(&map, b"some"), 1);
1477 1497 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1478 1498 assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0);
1479 1499
1480 1500 map.set_untracked(p(b"some/nested/path"))?;
1481 1501 assert_eq!(map.len(), 0);
1482 1502 assert!(map.get_map().get_node(p(b"some"))?.is_none());
1483 1503
1484 1504 Ok(())
1485 1505 }
1486 1506
1487 1507 /// Test the simple case of all tracked, but multiple files
1488 1508 #[test]
1489 1509 fn test_tracked_descendants_multiple() -> Result<(), DirstateError> {
1490 1510 let mut map = OwningDirstateMap::new_empty(vec![]);
1491 1511
1492 1512 map.set_tracked(p(b"some/nested/path"))?;
1493 1513 map.set_tracked(p(b"some/nested/file"))?;
1494 1514 // one layer without any files to test deletion cascade
1495 1515 map.set_tracked(p(b"some/other/nested/path"))?;
1496 1516 map.set_tracked(p(b"root_file"))?;
1497 1517 map.set_tracked(p(b"some/file"))?;
1498 1518 map.set_tracked(p(b"some/file2"))?;
1499 1519 map.set_tracked(p(b"some/file3"))?;
1500 1520
1501 1521 assert_eq!(map.len(), 7);
1502 1522 assert_eq!(tracked_descendants(&map, b"some"), 6);
1503 1523 assert_eq!(tracked_descendants(&map, b"some/nested"), 2);
1504 1524 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1505 1525 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1506 1526 assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0);
1507 1527
1508 1528 map.set_untracked(p(b"some/nested/path"))?;
1509 1529 assert_eq!(map.len(), 6);
1510 1530 assert_eq!(tracked_descendants(&map, b"some"), 5);
1511 1531 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1512 1532 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1513 1533 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1514 1534
1515 1535 map.set_untracked(p(b"some/nested/file"))?;
1516 1536 assert_eq!(map.len(), 5);
1517 1537 assert_eq!(tracked_descendants(&map, b"some"), 4);
1518 1538 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1519 1539 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1520 1540 assert_does_not_exist(&map, b"some_nested");
1521 1541
1522 1542 map.set_untracked(p(b"some/other/nested/path"))?;
1523 1543 assert_eq!(map.len(), 4);
1524 1544 assert_eq!(tracked_descendants(&map, b"some"), 3);
1525 1545 assert_does_not_exist(&map, b"some/other");
1526 1546
1527 1547 map.set_untracked(p(b"root_file"))?;
1528 1548 assert_eq!(map.len(), 3);
1529 1549 assert_eq!(tracked_descendants(&map, b"some"), 3);
1530 1550 assert_does_not_exist(&map, b"root_file");
1531 1551
1532 1552 map.set_untracked(p(b"some/file"))?;
1533 1553 assert_eq!(map.len(), 2);
1534 1554 assert_eq!(tracked_descendants(&map, b"some"), 2);
1535 1555 assert_does_not_exist(&map, b"some/file");
1536 1556
1537 1557 map.set_untracked(p(b"some/file2"))?;
1538 1558 assert_eq!(map.len(), 1);
1539 1559 assert_eq!(tracked_descendants(&map, b"some"), 1);
1540 1560 assert_does_not_exist(&map, b"some/file2");
1541 1561
1542 1562 map.set_untracked(p(b"some/file3"))?;
1543 1563 assert_eq!(map.len(), 0);
1544 1564 assert_does_not_exist(&map, b"some/file3");
1545 1565
1546 1566 Ok(())
1547 1567 }
1548 1568
1549 1569 /// Check with a mix of tracked and non-tracked items
1550 1570 #[test]
1551 1571 fn test_tracked_descendants_different() -> Result<(), DirstateError> {
1552 1572 let mut map = OwningDirstateMap::new_empty(vec![]);
1553 1573
1554 1574 // A file that was just added
1555 1575 map.set_tracked(p(b"some/nested/path"))?;
1556 1576 // This has no information, the dirstate should ignore it
1557 1577 map.reset_state(p(b"some/file"), false, false, false, false, None)?;
1558 1578 assert_does_not_exist(&map, b"some/file");
1559 1579
1560 1580 // A file that was removed
1561 1581 map.reset_state(
1562 1582 p(b"some/nested/file"),
1563 1583 false,
1564 1584 true,
1565 1585 false,
1566 1586 false,
1567 1587 None,
1568 1588 )?;
1569 1589 assert!(!map.get(p(b"some/nested/file"))?.unwrap().tracked());
1570 1590 // Only present in p2
1571 1591 map.reset_state(p(b"some/file3"), false, false, true, false, None)?;
1572 1592 assert!(!map.get(p(b"some/file3"))?.unwrap().tracked());
1573 1593 // A file that was merged
1574 1594 map.reset_state(p(b"root_file"), true, true, true, false, None)?;
1575 1595 assert!(map.get(p(b"root_file"))?.unwrap().tracked());
1576 1596 // A file that is added, with info from p2
1577 1597 // XXX is that actually possible?
1578 1598 map.reset_state(p(b"some/file2"), true, false, true, false, None)?;
1579 1599 assert!(map.get(p(b"some/file2"))?.unwrap().tracked());
1580 1600 // A clean file
1581 1601 // One layer without any files to test deletion cascade
1582 1602 map.reset_state(
1583 1603 p(b"some/other/nested/path"),
1584 1604 true,
1585 1605 true,
1586 1606 false,
1587 1607 false,
1588 1608 None,
1589 1609 )?;
1590 1610 assert!(map.get(p(b"some/other/nested/path"))?.unwrap().tracked());
1591 1611
1592 1612 assert_eq!(map.len(), 6);
1593 1613 assert_eq!(tracked_descendants(&map, b"some"), 3);
1594 1614 assert_eq!(descendants_with_an_entry(&map, b"some"), 5);
1595 1615 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1596 1616 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1597 1617 assert_eq!(tracked_descendants(&map, b"some/other/nested/path"), 0);
1598 1618 assert_eq!(
1599 1619 descendants_with_an_entry(&map, b"some/other/nested/path"),
1600 1620 0
1601 1621 );
1602 1622 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1603 1623 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1604 1624
1605 1625 // might as well check this
1606 1626 map.set_untracked(p(b"path/does/not/exist"))?;
1607 1627 assert_eq!(map.len(), 6);
1608 1628
1609 1629 map.set_untracked(p(b"some/other/nested/path"))?;
1610 1630 // It is set untracked but not deleted since it held other information
1611 1631 assert_eq!(map.len(), 6);
1612 1632 assert_eq!(tracked_descendants(&map, b"some"), 2);
1613 1633 assert_eq!(descendants_with_an_entry(&map, b"some"), 5);
1614 1634 assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1);
1615 1635 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1616 1636 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1617 1637 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1618 1638
1619 1639 map.set_untracked(p(b"some/nested/path"))?;
1620 1640 // It is set untracked *and* deleted since it was only added
1621 1641 assert_eq!(map.len(), 5);
1622 1642 assert_eq!(tracked_descendants(&map, b"some"), 1);
1623 1643 assert_eq!(descendants_with_an_entry(&map, b"some"), 4);
1624 1644 assert_eq!(tracked_descendants(&map, b"some/nested"), 0);
1625 1645 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 1);
1626 1646 assert_does_not_exist(&map, b"some/nested/path");
1627 1647
1628 1648 map.set_untracked(p(b"root_file"))?;
1629 1649 // Untracked but not deleted
1630 1650 assert_eq!(map.len(), 5);
1631 1651 assert!(map.get(p(b"root_file"))?.is_some());
1632 1652
1633 1653 map.set_untracked(p(b"some/file2"))?;
1634 1654 assert_eq!(map.len(), 5);
1635 1655 assert_eq!(tracked_descendants(&map, b"some"), 0);
1636 1656 assert!(map.get(p(b"some/file2"))?.is_some());
1637 1657
1638 1658 map.set_untracked(p(b"some/file3"))?;
1639 1659 assert_eq!(map.len(), 5);
1640 1660 assert_eq!(tracked_descendants(&map, b"some"), 0);
1641 1661 assert!(map.get(p(b"some/file3"))?.is_some());
1642 1662
1643 1663 Ok(())
1644 1664 }
1645 1665
1646 1666 /// Check that copies counter is correctly updated
1647 1667 #[test]
1648 1668 fn test_copy_source() -> Result<(), DirstateError> {
1649 1669 let mut map = OwningDirstateMap::new_empty(vec![]);
1650 1670
1651 1671 // Clean file
1652 1672 map.reset_state(p(b"files/clean"), true, true, false, false, None)?;
1653 1673 // Merged file
1654 1674 map.reset_state(p(b"files/from_p2"), true, true, true, false, None)?;
1655 1675 // Removed file
1656 1676 map.reset_state(p(b"removed"), false, true, false, false, None)?;
1657 1677 // Added file
1658 1678 map.reset_state(p(b"files/added"), true, false, false, false, None)?;
1659 1679 // Add copy
1660 1680 map.copy_map_insert(p(b"files/clean"), p(b"clean_copy_source"))?;
1661 1681 assert_eq!(map.copy_map_len(), 1);
1662 1682
1663 1683 // Copy override
1664 1684 map.copy_map_insert(p(b"files/clean"), p(b"other_clean_copy_source"))?;
1665 1685 assert_eq!(map.copy_map_len(), 1);
1666 1686
1667 1687 // Multiple copies
1668 1688 map.copy_map_insert(p(b"removed"), p(b"removed_copy_source"))?;
1669 1689 assert_eq!(map.copy_map_len(), 2);
1670 1690
1671 1691 map.copy_map_insert(p(b"files/added"), p(b"added_copy_source"))?;
1672 1692 assert_eq!(map.copy_map_len(), 3);
1673 1693
1674 1694 // Added, so the entry is completely removed
1675 1695 map.set_untracked(p(b"files/added"))?;
1676 1696 assert_does_not_exist(&map, b"files/added");
1677 1697 assert_eq!(map.copy_map_len(), 2);
1678 1698
1679 1699 // Removed, so the entry is kept around, so is its copy
1680 1700 map.set_untracked(p(b"removed"))?;
1681 1701 assert!(map.get(p(b"removed"))?.is_some());
1682 1702 assert_eq!(map.copy_map_len(), 2);
1683 1703
1684 1704 // Clean, so the entry is kept around, but not its copy
1685 1705 map.set_untracked(p(b"files/clean"))?;
1686 1706 assert!(map.get(p(b"files/clean"))?.is_some());
1687 1707 assert_eq!(map.copy_map_len(), 1);
1688 1708
1689 1709 map.copy_map_insert(p(b"files/from_p2"), p(b"from_p2_copy_source"))?;
1690 1710 assert_eq!(map.copy_map_len(), 2);
1691 1711
1692 1712 // Info from p2, so its copy source info is kept around
1693 1713 map.set_untracked(p(b"files/from_p2"))?;
1694 1714 assert!(map.get(p(b"files/from_p2"))?.is_some());
1695 1715 assert_eq!(map.copy_map_len(), 2);
1696 1716
1697 1717 Ok(())
1698 1718 }
1699 1719
1700 1720 /// Test with "on disk" data. For the sake of this test, the "on disk" data
1701 1721 /// does not actually come from the disk, but it's opaque to the code being
1702 1722 /// tested.
1703 1723 #[test]
1704 1724 fn test_on_disk() -> Result<(), DirstateError> {
1705 1725 // First let's create some data to put "on disk"
1706 1726 let mut map = OwningDirstateMap::new_empty(vec![]);
1707 1727
1708 1728 // A file that was just added
1709 1729 map.set_tracked(p(b"some/nested/added"))?;
1710 1730 map.copy_map_insert(p(b"some/nested/added"), p(b"added_copy_source"))?;
1711 1731
1712 1732 // A file that was removed
1713 1733 map.reset_state(
1714 1734 p(b"some/nested/removed"),
1715 1735 false,
1716 1736 true,
1717 1737 false,
1718 1738 false,
1719 1739 None,
1720 1740 )?;
1721 1741 // Only present in p2
1722 1742 map.reset_state(
1723 1743 p(b"other/p2_info_only"),
1724 1744 false,
1725 1745 false,
1726 1746 true,
1727 1747 false,
1728 1748 None,
1729 1749 )?;
1730 1750 map.copy_map_insert(
1731 1751 p(b"other/p2_info_only"),
1732 1752 p(b"other/p2_info_copy_source"),
1733 1753 )?;
1734 1754 // A file that was merged
1735 1755 map.reset_state(p(b"merged"), true, true, true, false, None)?;
1736 1756 // A file that is added, with info from p2
1737 1757 // XXX is that actually possible?
1738 1758 map.reset_state(
1739 1759 p(b"other/added_with_p2"),
1740 1760 true,
1741 1761 false,
1742 1762 true,
1743 1763 false,
1744 1764 None,
1745 1765 )?;
1746 1766 // One layer without any files to test deletion cascade
1747 1767 // A clean file
1748 1768 map.reset_state(
1749 1769 p(b"some/other/nested/clean"),
1750 1770 true,
1751 1771 true,
1752 1772 false,
1753 1773 false,
1754 1774 None,
1755 1775 )?;
1756 1776
1757 1777 let (packed, metadata, _should_append) = map.pack_v2(false)?;
1758 1778 let packed_len = packed.len();
1759 1779 assert!(packed_len > 0);
1760 1780
1761 1781 // Recreate "from disk"
1762 1782 let mut map = OwningDirstateMap::new_v2(
1763 1783 packed,
1764 1784 packed_len,
1765 1785 metadata.as_bytes(),
1766 1786 )?;
1767 1787
1768 1788 // Check that everything is accounted for
1769 1789 assert!(map.contains_key(p(b"some/nested/added"))?);
1770 1790 assert!(map.contains_key(p(b"some/nested/removed"))?);
1771 1791 assert!(map.contains_key(p(b"merged"))?);
1772 1792 assert!(map.contains_key(p(b"other/p2_info_only"))?);
1773 1793 assert!(map.contains_key(p(b"other/added_with_p2"))?);
1774 1794 assert!(map.contains_key(p(b"some/other/nested/clean"))?);
1775 1795 assert_eq!(
1776 1796 map.copy_map_get(p(b"some/nested/added"))?,
1777 1797 Some(p(b"added_copy_source"))
1778 1798 );
1779 1799 assert_eq!(
1780 1800 map.copy_map_get(p(b"other/p2_info_only"))?,
1781 1801 Some(p(b"other/p2_info_copy_source"))
1782 1802 );
1783 1803 assert_eq!(tracked_descendants(&map, b"some"), 2);
1784 1804 assert_eq!(descendants_with_an_entry(&map, b"some"), 3);
1785 1805 assert_eq!(tracked_descendants(&map, b"other"), 1);
1786 1806 assert_eq!(descendants_with_an_entry(&map, b"other"), 2);
1787 1807 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1788 1808 assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1);
1789 1809 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1790 1810 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1791 1811 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1792 1812 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1793 1813 assert_eq!(map.len(), 6);
1794 1814 assert_eq!(map.get_map().unreachable_bytes, 0);
1795 1815 assert_eq!(map.copy_map_len(), 2);
1796 1816
1797 1817 // Shouldn't change anything since it's already not tracked
1798 1818 map.set_untracked(p(b"some/nested/removed"))?;
1799 1819 assert_eq!(map.get_map().unreachable_bytes, 0);
1800 1820
1801 1821 match map.get_map().root {
1802 1822 ChildNodes::InMemory(_) => {
1803 1823 panic!("root should not have been mutated")
1804 1824 }
1805 1825 _ => (),
1806 1826 }
1807 1827 // We haven't mutated enough (nothing, actually), we should still be in
1808 1828 // the append strategy
1809 1829 assert!(map.get_map().write_should_append());
1810 1830
1811 1831 // But this mutates the structure, so there should be unreachable_bytes
1812 1832 assert!(map.set_untracked(p(b"some/nested/added"))?);
1813 1833 let unreachable_bytes = map.get_map().unreachable_bytes;
1814 1834 assert!(unreachable_bytes > 0);
1815 1835
1816 1836 match map.get_map().root {
1817 1837 ChildNodes::OnDisk(_) => panic!("root should have been mutated"),
1818 1838 _ => (),
1819 1839 }
1820 1840
1821 1841 // This should not mutate the structure either, since `root` has
1822 1842 // already been mutated along with its direct children.
1823 1843 map.set_untracked(p(b"merged"))?;
1824 1844 assert_eq!(map.get_map().unreachable_bytes, unreachable_bytes);
1825 1845
1826 1846 match map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap() {
1827 1847 NodeRef::InMemory(_, _) => {
1828 1848 panic!("'other/added_with_p2' should not have been mutated")
1829 1849 }
1830 1850 _ => (),
1831 1851 }
1832 1852 // But this should, since it's in a different path
1833 1853 // than `<root>some/nested/add`
1834 1854 map.set_untracked(p(b"other/added_with_p2"))?;
1835 1855 assert!(map.get_map().unreachable_bytes > unreachable_bytes);
1836 1856
1837 1857 match map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap() {
1838 1858 NodeRef::OnDisk(_) => {
1839 1859 panic!("'other/added_with_p2' should have been mutated")
1840 1860 }
1841 1861 _ => (),
1842 1862 }
1843 1863
1844 1864 // We have rewritten most of the tree, we should create a new file
1845 1865 assert!(!map.get_map().write_should_append());
1846 1866
1847 1867 Ok(())
1848 1868 }
1849 1869 }
@@ -1,849 +1,846 b''
1 1 use crate::dirstate::entry::TruncatedTimestamp;
2 2 use crate::dirstate::status::IgnoreFnType;
3 3 use crate::dirstate::status::StatusPath;
4 4 use crate::dirstate_tree::dirstate_map::BorrowedPath;
5 5 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
6 6 use crate::dirstate_tree::dirstate_map::DirstateMap;
7 7 use crate::dirstate_tree::dirstate_map::NodeData;
8 8 use crate::dirstate_tree::dirstate_map::NodeRef;
9 9 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
10 10 use crate::matchers::get_ignore_function;
11 11 use crate::matchers::Matcher;
12 12 use crate::utils::files::get_bytes_from_os_string;
13 13 use crate::utils::files::get_path_from_bytes;
14 14 use crate::utils::hg_path::HgPath;
15 15 use crate::BadMatch;
16 16 use crate::DirstateStatus;
17 17 use crate::EntryState;
18 18 use crate::HgPathBuf;
19 19 use crate::HgPathCow;
20 20 use crate::PatternFileWarning;
21 21 use crate::StatusError;
22 22 use crate::StatusOptions;
23 23 use micro_timer::timed;
24 24 use rayon::prelude::*;
25 25 use sha1::{Digest, Sha1};
26 26 use std::borrow::Cow;
27 27 use std::io;
28 28 use std::path::Path;
29 29 use std::path::PathBuf;
30 30 use std::sync::Mutex;
31 31 use std::time::SystemTime;
32 32
33 33 /// Returns the status of the working directory compared to its parent
34 34 /// changeset.
35 35 ///
36 36 /// This algorithm is based on traversing the filesystem tree (`fs` in function
37 37 /// and variable names) and dirstate tree at the same time. The core of this
38 38 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
39 39 /// and its use of `itertools::merge_join_by`. When reaching a path that only
40 40 /// exists in one of the two trees, depending on information requested by
41 41 /// `options` we may need to traverse the remaining subtree.
42 42 #[timed]
43 43 pub fn status<'dirstate>(
44 44 dmap: &'dirstate mut DirstateMap,
45 45 matcher: &(dyn Matcher + Sync),
46 46 root_dir: PathBuf,
47 47 ignore_files: Vec<PathBuf>,
48 48 options: StatusOptions,
49 49 ) -> Result<(DirstateStatus<'dirstate>, Vec<PatternFileWarning>), StatusError>
50 50 {
51 51 // Force the global rayon threadpool to not exceed 16 concurrent threads.
52 52 // This is a stop-gap measure until we figure out why using more than 16
53 53 // threads makes `status` slower for each additional thread.
54 54 // We use `ok()` in case the global threadpool has already been
55 55 // instantiated in `rhg` or some other caller.
56 56 // TODO find the underlying cause and fix it, then remove this.
57 57 rayon::ThreadPoolBuilder::new()
58 58 .num_threads(16)
59 59 .build_global()
60 60 .ok();
61 61
62 62 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
63 63 if options.list_ignored || options.list_unknown {
64 64 let mut hasher = Sha1::new();
65 65 let (ignore_fn, warnings) = get_ignore_function(
66 66 ignore_files,
67 67 &root_dir,
68 68 &mut |pattern_bytes| hasher.update(pattern_bytes),
69 69 )?;
70 70 let new_hash = *hasher.finalize().as_ref();
71 71 let changed = new_hash != dmap.ignore_patterns_hash;
72 72 dmap.ignore_patterns_hash = new_hash;
73 73 (ignore_fn, warnings, Some(changed))
74 74 } else {
75 75 (Box::new(|&_| true), vec![], None)
76 76 };
77 77
78 78 let filesystem_time_at_status_start =
79 79 filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
80 80
81 81 // If the repository is under the current directory, prefer using a
82 82 // relative path, so the kernel needs to traverse fewer directory in every
83 83 // call to `read_dir` or `symlink_metadata`.
84 84 // This is effective in the common case where the current directory is the
85 85 // repository root.
86 86
87 87 // TODO: Better yet would be to use libc functions like `openat` and
88 88 // `fstatat` to remove such repeated traversals entirely, but the standard
89 89 // library does not provide APIs based on those.
90 90 // Maybe with a crate like https://crates.io/crates/openat instead?
91 91 let root_dir = if let Some(relative) = std::env::current_dir()
92 92 .ok()
93 93 .and_then(|cwd| root_dir.strip_prefix(cwd).ok())
94 94 {
95 95 relative
96 96 } else {
97 97 &root_dir
98 98 };
99 99
100 100 let outcome = DirstateStatus {
101 101 filesystem_time_at_status_start,
102 102 ..Default::default()
103 103 };
104 104 let common = StatusCommon {
105 105 dmap,
106 106 options,
107 107 matcher,
108 108 ignore_fn,
109 109 outcome: Mutex::new(outcome),
110 110 ignore_patterns_have_changed: patterns_changed,
111 111 new_cachable_directories: Default::default(),
112 112 outated_cached_directories: Default::default(),
113 113 filesystem_time_at_status_start,
114 114 };
115 115 let is_at_repo_root = true;
116 116 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
117 117 let has_ignored_ancestor = false;
118 118 let root_cached_mtime = None;
119 119 let root_dir_metadata = None;
120 120 // If the path we have for the repository root is a symlink, do follow it.
121 121 // (As opposed to symlinks within the working directory which are not
122 122 // followed, using `std::fs::symlink_metadata`.)
123 123 common.traverse_fs_directory_and_dirstate(
124 124 has_ignored_ancestor,
125 125 dmap.root.as_ref(),
126 126 hg_path,
127 127 &root_dir,
128 128 root_dir_metadata,
129 129 root_cached_mtime,
130 130 is_at_repo_root,
131 131 )?;
132 132 let mut outcome = common.outcome.into_inner().unwrap();
133 133 let new_cachable = common.new_cachable_directories.into_inner().unwrap();
134 134 let outdated = common.outated_cached_directories.into_inner().unwrap();
135 135
136 136 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
137 137 || !outdated.is_empty()
138 138 || !new_cachable.is_empty();
139 139
140 140 // Remove outdated mtimes before adding new mtimes, in case a given
141 141 // directory is both
142 142 for path in &outdated {
143 let node = dmap.get_or_insert(path)?;
144 if let NodeData::CachedDirectory { .. } = &node.data {
145 node.data = NodeData::None
146 }
143 dmap.clear_cached_mtime(path)?;
147 144 }
148 145 for (path, mtime) in &new_cachable {
149 146 let node = dmap.get_or_insert(path)?;
150 147 match &node.data {
151 148 NodeData::Entry(_) => {} // Don’t overwrite an entry
152 149 NodeData::CachedDirectory { .. } | NodeData::None => {
153 150 node.data = NodeData::CachedDirectory { mtime: *mtime }
154 151 }
155 152 }
156 153 }
157 154
158 155 Ok((outcome, warnings))
159 156 }
160 157
161 158 /// Bag of random things needed by various parts of the algorithm. Reduces the
162 159 /// number of parameters passed to functions.
163 160 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
164 161 dmap: &'tree DirstateMap<'on_disk>,
165 162 options: StatusOptions,
166 163 matcher: &'a (dyn Matcher + Sync),
167 164 ignore_fn: IgnoreFnType<'a>,
168 165 outcome: Mutex<DirstateStatus<'on_disk>>,
169 166 new_cachable_directories:
170 167 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
171 168 outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
172 169
173 170 /// Whether ignore files like `.hgignore` have changed since the previous
174 171 /// time a `status()` call wrote their hash to the dirstate. `None` means
175 172 /// we don’t know as this run doesn’t list either ignored or uknown files
176 173 /// and therefore isn’t reading `.hgignore`.
177 174 ignore_patterns_have_changed: Option<bool>,
178 175
179 176 /// The current time at the start of the `status()` algorithm, as measured
180 177 /// and possibly truncated by the filesystem.
181 178 filesystem_time_at_status_start: Option<TruncatedTimestamp>,
182 179 }
183 180
184 181 enum Outcome {
185 182 Modified,
186 183 Added,
187 184 Removed,
188 185 Deleted,
189 186 Clean,
190 187 Ignored,
191 188 Unknown,
192 189 Unsure,
193 190 }
194 191
195 192 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
196 193 fn push_outcome(
197 194 &self,
198 195 which: Outcome,
199 196 dirstate_node: &NodeRef<'tree, 'on_disk>,
200 197 ) -> Result<(), DirstateV2ParseError> {
201 198 let path = dirstate_node
202 199 .full_path_borrowed(self.dmap.on_disk)?
203 200 .detach_from_tree();
204 201 let copy_source = if self.options.list_copies {
205 202 dirstate_node
206 203 .copy_source_borrowed(self.dmap.on_disk)?
207 204 .map(|source| source.detach_from_tree())
208 205 } else {
209 206 None
210 207 };
211 208 self.push_outcome_common(which, path, copy_source);
212 209 Ok(())
213 210 }
214 211
215 212 fn push_outcome_without_copy_source(
216 213 &self,
217 214 which: Outcome,
218 215 path: &BorrowedPath<'_, 'on_disk>,
219 216 ) {
220 217 self.push_outcome_common(which, path.detach_from_tree(), None)
221 218 }
222 219
223 220 fn push_outcome_common(
224 221 &self,
225 222 which: Outcome,
226 223 path: HgPathCow<'on_disk>,
227 224 copy_source: Option<HgPathCow<'on_disk>>,
228 225 ) {
229 226 let mut outcome = self.outcome.lock().unwrap();
230 227 let vec = match which {
231 228 Outcome::Modified => &mut outcome.modified,
232 229 Outcome::Added => &mut outcome.added,
233 230 Outcome::Removed => &mut outcome.removed,
234 231 Outcome::Deleted => &mut outcome.deleted,
235 232 Outcome::Clean => &mut outcome.clean,
236 233 Outcome::Ignored => &mut outcome.ignored,
237 234 Outcome::Unknown => &mut outcome.unknown,
238 235 Outcome::Unsure => &mut outcome.unsure,
239 236 };
240 237 vec.push(StatusPath { path, copy_source });
241 238 }
242 239
243 240 fn read_dir(
244 241 &self,
245 242 hg_path: &HgPath,
246 243 fs_path: &Path,
247 244 is_at_repo_root: bool,
248 245 ) -> Result<Vec<DirEntry>, ()> {
249 246 DirEntry::read_dir(fs_path, is_at_repo_root)
250 247 .map_err(|error| self.io_error(error, hg_path))
251 248 }
252 249
253 250 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
254 251 let errno = error.raw_os_error().expect("expected real OS error");
255 252 self.outcome
256 253 .lock()
257 254 .unwrap()
258 255 .bad
259 256 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
260 257 }
261 258
262 259 fn check_for_outdated_directory_cache(
263 260 &self,
264 261 dirstate_node: &NodeRef<'tree, 'on_disk>,
265 262 ) -> Result<(), DirstateV2ParseError> {
266 263 if self.ignore_patterns_have_changed == Some(true)
267 264 && dirstate_node.cached_directory_mtime()?.is_some()
268 265 {
269 266 self.outated_cached_directories.lock().unwrap().push(
270 267 dirstate_node
271 268 .full_path_borrowed(self.dmap.on_disk)?
272 269 .detach_from_tree(),
273 270 )
274 271 }
275 272 Ok(())
276 273 }
277 274
278 275 /// If this returns true, we can get accurate results by only using
279 276 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
280 277 /// need to call `read_dir`.
281 278 fn can_skip_fs_readdir(
282 279 &self,
283 280 directory_metadata: Option<&std::fs::Metadata>,
284 281 cached_directory_mtime: Option<TruncatedTimestamp>,
285 282 ) -> bool {
286 283 if !self.options.list_unknown && !self.options.list_ignored {
287 284 // All states that we care about listing have corresponding
288 285 // dirstate entries.
289 286 // This happens for example with `hg status -mard`.
290 287 return true;
291 288 }
292 289 if !self.options.list_ignored
293 290 && self.ignore_patterns_have_changed == Some(false)
294 291 {
295 292 if let Some(cached_mtime) = cached_directory_mtime {
296 293 // The dirstate contains a cached mtime for this directory, set
297 294 // by a previous run of the `status` algorithm which found this
298 295 // directory eligible for `read_dir` caching.
299 296 if let Some(meta) = directory_metadata {
300 297 if cached_mtime
301 298 .likely_equal_to_mtime_of(meta)
302 299 .unwrap_or(false)
303 300 {
304 301 // The mtime of that directory has not changed
305 302 // since then, which means that the results of
306 303 // `read_dir` should also be unchanged.
307 304 return true;
308 305 }
309 306 }
310 307 }
311 308 }
312 309 false
313 310 }
314 311
315 312 /// Returns whether all child entries of the filesystem directory have a
316 313 /// corresponding dirstate node or are ignored.
317 314 fn traverse_fs_directory_and_dirstate(
318 315 &self,
319 316 has_ignored_ancestor: bool,
320 317 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
321 318 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
322 319 directory_fs_path: &Path,
323 320 directory_metadata: Option<&std::fs::Metadata>,
324 321 cached_directory_mtime: Option<TruncatedTimestamp>,
325 322 is_at_repo_root: bool,
326 323 ) -> Result<bool, DirstateV2ParseError> {
327 324 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
328 325 {
329 326 dirstate_nodes
330 327 .par_iter()
331 328 .map(|dirstate_node| {
332 329 let fs_path = directory_fs_path.join(get_path_from_bytes(
333 330 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
334 331 ));
335 332 match std::fs::symlink_metadata(&fs_path) {
336 333 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
337 334 &fs_path,
338 335 &fs_metadata,
339 336 dirstate_node,
340 337 has_ignored_ancestor,
341 338 ),
342 339 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
343 340 self.traverse_dirstate_only(dirstate_node)
344 341 }
345 342 Err(error) => {
346 343 let hg_path =
347 344 dirstate_node.full_path(self.dmap.on_disk)?;
348 345 Ok(self.io_error(error, hg_path))
349 346 }
350 347 }
351 348 })
352 349 .collect::<Result<_, _>>()?;
353 350
354 351 // We don’t know, so conservatively say this isn’t the case
355 352 let children_all_have_dirstate_node_or_are_ignored = false;
356 353
357 354 return Ok(children_all_have_dirstate_node_or_are_ignored);
358 355 }
359 356
360 357 let mut fs_entries = if let Ok(entries) = self.read_dir(
361 358 directory_hg_path,
362 359 directory_fs_path,
363 360 is_at_repo_root,
364 361 ) {
365 362 entries
366 363 } else {
367 364 // Treat an unreadable directory (typically because of insufficient
368 365 // permissions) like an empty directory. `self.read_dir` has
369 366 // already called `self.io_error` so a warning will be emitted.
370 367 Vec::new()
371 368 };
372 369
373 370 // `merge_join_by` requires both its input iterators to be sorted:
374 371
375 372 let dirstate_nodes = dirstate_nodes.sorted();
376 373 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
377 374 // https://github.com/rust-lang/rust/issues/34162
378 375 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
379 376
380 377 // Propagate here any error that would happen inside the comparison
381 378 // callback below
382 379 for dirstate_node in &dirstate_nodes {
383 380 dirstate_node.base_name(self.dmap.on_disk)?;
384 381 }
385 382 itertools::merge_join_by(
386 383 dirstate_nodes,
387 384 &fs_entries,
388 385 |dirstate_node, fs_entry| {
389 386 // This `unwrap` never panics because we already propagated
390 387 // those errors above
391 388 dirstate_node
392 389 .base_name(self.dmap.on_disk)
393 390 .unwrap()
394 391 .cmp(&fs_entry.base_name)
395 392 },
396 393 )
397 394 .par_bridge()
398 395 .map(|pair| {
399 396 use itertools::EitherOrBoth::*;
400 397 let has_dirstate_node_or_is_ignored;
401 398 match pair {
402 399 Both(dirstate_node, fs_entry) => {
403 400 self.traverse_fs_and_dirstate(
404 401 &fs_entry.full_path,
405 402 &fs_entry.metadata,
406 403 dirstate_node,
407 404 has_ignored_ancestor,
408 405 )?;
409 406 has_dirstate_node_or_is_ignored = true
410 407 }
411 408 Left(dirstate_node) => {
412 409 self.traverse_dirstate_only(dirstate_node)?;
413 410 has_dirstate_node_or_is_ignored = true;
414 411 }
415 412 Right(fs_entry) => {
416 413 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
417 414 has_ignored_ancestor,
418 415 directory_hg_path,
419 416 fs_entry,
420 417 )
421 418 }
422 419 }
423 420 Ok(has_dirstate_node_or_is_ignored)
424 421 })
425 422 .try_reduce(|| true, |a, b| Ok(a && b))
426 423 }
427 424
428 425 fn traverse_fs_and_dirstate(
429 426 &self,
430 427 fs_path: &Path,
431 428 fs_metadata: &std::fs::Metadata,
432 429 dirstate_node: NodeRef<'tree, 'on_disk>,
433 430 has_ignored_ancestor: bool,
434 431 ) -> Result<(), DirstateV2ParseError> {
435 432 self.check_for_outdated_directory_cache(&dirstate_node)?;
436 433 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
437 434 let file_type = fs_metadata.file_type();
438 435 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
439 436 if !file_or_symlink {
440 437 // If we previously had a file here, it was removed (with
441 438 // `hg rm` or similar) or deleted before it could be
442 439 // replaced by a directory or something else.
443 440 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
444 441 }
445 442 if file_type.is_dir() {
446 443 if self.options.collect_traversed_dirs {
447 444 self.outcome
448 445 .lock()
449 446 .unwrap()
450 447 .traversed
451 448 .push(hg_path.detach_from_tree())
452 449 }
453 450 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
454 451 let is_at_repo_root = false;
455 452 let children_all_have_dirstate_node_or_are_ignored = self
456 453 .traverse_fs_directory_and_dirstate(
457 454 is_ignored,
458 455 dirstate_node.children(self.dmap.on_disk)?,
459 456 hg_path,
460 457 fs_path,
461 458 Some(fs_metadata),
462 459 dirstate_node.cached_directory_mtime()?,
463 460 is_at_repo_root,
464 461 )?;
465 462 self.maybe_save_directory_mtime(
466 463 children_all_have_dirstate_node_or_are_ignored,
467 464 fs_metadata,
468 465 dirstate_node,
469 466 )?
470 467 } else {
471 468 if file_or_symlink && self.matcher.matches(hg_path) {
472 469 if let Some(state) = dirstate_node.state()? {
473 470 match state {
474 471 EntryState::Added => {
475 472 self.push_outcome(Outcome::Added, &dirstate_node)?
476 473 }
477 474 EntryState::Removed => self
478 475 .push_outcome(Outcome::Removed, &dirstate_node)?,
479 476 EntryState::Merged => self
480 477 .push_outcome(Outcome::Modified, &dirstate_node)?,
481 478 EntryState::Normal => self
482 479 .handle_normal_file(&dirstate_node, fs_metadata)?,
483 480 }
484 481 } else {
485 482 // `node.entry.is_none()` indicates a "directory"
486 483 // node, but the filesystem has a file
487 484 self.mark_unknown_or_ignored(
488 485 has_ignored_ancestor,
489 486 hg_path,
490 487 );
491 488 }
492 489 }
493 490
494 491 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
495 492 {
496 493 self.traverse_dirstate_only(child_node)?
497 494 }
498 495 }
499 496 Ok(())
500 497 }
501 498
502 499 fn maybe_save_directory_mtime(
503 500 &self,
504 501 children_all_have_dirstate_node_or_are_ignored: bool,
505 502 directory_metadata: &std::fs::Metadata,
506 503 dirstate_node: NodeRef<'tree, 'on_disk>,
507 504 ) -> Result<(), DirstateV2ParseError> {
508 505 if !children_all_have_dirstate_node_or_are_ignored {
509 506 return Ok(());
510 507 }
511 508 // All filesystem directory entries from `read_dir` have a
512 509 // corresponding node in the dirstate, so we can reconstitute the
513 510 // names of those entries without calling `read_dir` again.
514 511
515 512 // TODO: use let-else here and below when available:
516 513 // https://github.com/rust-lang/rust/issues/87335
517 514 let status_start = if let Some(status_start) =
518 515 &self.filesystem_time_at_status_start
519 516 {
520 517 status_start
521 518 } else {
522 519 return Ok(());
523 520 };
524 521
525 522 // Although the Rust standard library’s `SystemTime` type
526 523 // has nanosecond precision, the times reported for a
527 524 // directory’s (or file’s) modified time may have lower
528 525 // resolution based on the filesystem (for example ext3
529 526 // only stores integer seconds), kernel (see
530 527 // https://stackoverflow.com/a/14393315/1162888), etc.
531 528 let directory_mtime = if let Ok(option) =
532 529 TruncatedTimestamp::for_reliable_mtime_of(
533 530 directory_metadata,
534 531 status_start,
535 532 ) {
536 533 if let Some(directory_mtime) = option {
537 534 directory_mtime
538 535 } else {
539 536 // The directory was modified too recently,
540 537 // don’t cache its `read_dir` results.
541 538 //
542 539 // 1. A change to this directory (direct child was
543 540 // added or removed) cause its mtime to be set
544 541 // (possibly truncated) to `directory_mtime`
545 542 // 2. This `status` algorithm calls `read_dir`
546 543 // 3. An other change is made to the same directory is
547 544 // made so that calling `read_dir` agin would give
548 545 // different results, but soon enough after 1. that
549 546 // the mtime stays the same
550 547 //
551 548 // On a system where the time resolution poor, this
552 549 // scenario is not unlikely if all three steps are caused
553 550 // by the same script.
554 551 return Ok(());
555 552 }
556 553 } else {
557 554 // OS/libc does not support mtime?
558 555 return Ok(());
559 556 };
560 557 // We’ve observed (through `status_start`) that time has
561 558 // “progressed” since `directory_mtime`, so any further
562 559 // change to this directory is extremely likely to cause a
563 560 // different mtime.
564 561 //
565 562 // Having the same mtime again is not entirely impossible
566 563 // since the system clock is not monotonous. It could jump
567 564 // backward to some point before `directory_mtime`, then a
568 565 // directory change could potentially happen during exactly
569 566 // the wrong tick.
570 567 //
571 568 // We deem this scenario (unlike the previous one) to be
572 569 // unlikely enough in practice.
573 570
574 571 let is_up_to_date =
575 572 if let Some(cached) = dirstate_node.cached_directory_mtime()? {
576 573 cached.likely_equal(directory_mtime)
577 574 } else {
578 575 false
579 576 };
580 577 if !is_up_to_date {
581 578 let hg_path = dirstate_node
582 579 .full_path_borrowed(self.dmap.on_disk)?
583 580 .detach_from_tree();
584 581 self.new_cachable_directories
585 582 .lock()
586 583 .unwrap()
587 584 .push((hg_path, directory_mtime))
588 585 }
589 586 Ok(())
590 587 }
591 588
592 589 /// A file with `EntryState::Normal` in the dirstate was found in the
593 590 /// filesystem
594 591 fn handle_normal_file(
595 592 &self,
596 593 dirstate_node: &NodeRef<'tree, 'on_disk>,
597 594 fs_metadata: &std::fs::Metadata,
598 595 ) -> Result<(), DirstateV2ParseError> {
599 596 // Keep the low 31 bits
600 597 fn truncate_u64(value: u64) -> i32 {
601 598 (value & 0x7FFF_FFFF) as i32
602 599 }
603 600
604 601 let entry = dirstate_node
605 602 .entry()?
606 603 .expect("handle_normal_file called with entry-less node");
607 604 let mode_changed =
608 605 || self.options.check_exec && entry.mode_changed(fs_metadata);
609 606 let size = entry.size();
610 607 let size_changed = size != truncate_u64(fs_metadata.len());
611 608 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
612 609 // issue6456: Size returned may be longer due to encryption
613 610 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
614 611 self.push_outcome(Outcome::Unsure, dirstate_node)?
615 612 } else if dirstate_node.has_copy_source()
616 613 || entry.is_from_other_parent()
617 614 || (size >= 0 && (size_changed || mode_changed()))
618 615 {
619 616 self.push_outcome(Outcome::Modified, dirstate_node)?
620 617 } else {
621 618 let mtime_looks_clean;
622 619 if let Some(dirstate_mtime) = entry.truncated_mtime() {
623 620 let fs_mtime = TruncatedTimestamp::for_mtime_of(fs_metadata)
624 621 .expect("OS/libc does not support mtime?");
625 622 // There might be a change in the future if for example the
626 623 // internal clock become off while process run, but this is a
627 624 // case where the issues the user would face
628 625 // would be a lot worse and there is nothing we
629 626 // can really do.
630 627 mtime_looks_clean = fs_mtime.likely_equal(dirstate_mtime)
631 628 } else {
632 629 // No mtime in the dirstate entry
633 630 mtime_looks_clean = false
634 631 };
635 632 if !mtime_looks_clean {
636 633 self.push_outcome(Outcome::Unsure, dirstate_node)?
637 634 } else if self.options.list_clean {
638 635 self.push_outcome(Outcome::Clean, dirstate_node)?
639 636 }
640 637 }
641 638 Ok(())
642 639 }
643 640
644 641 /// A node in the dirstate tree has no corresponding filesystem entry
645 642 fn traverse_dirstate_only(
646 643 &self,
647 644 dirstate_node: NodeRef<'tree, 'on_disk>,
648 645 ) -> Result<(), DirstateV2ParseError> {
649 646 self.check_for_outdated_directory_cache(&dirstate_node)?;
650 647 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
651 648 dirstate_node
652 649 .children(self.dmap.on_disk)?
653 650 .par_iter()
654 651 .map(|child_node| self.traverse_dirstate_only(child_node))
655 652 .collect()
656 653 }
657 654
658 655 /// A node in the dirstate tree has no corresponding *file* on the
659 656 /// filesystem
660 657 ///
661 658 /// Does nothing on a "directory" node
662 659 fn mark_removed_or_deleted_if_file(
663 660 &self,
664 661 dirstate_node: &NodeRef<'tree, 'on_disk>,
665 662 ) -> Result<(), DirstateV2ParseError> {
666 663 if let Some(state) = dirstate_node.state()? {
667 664 let path = dirstate_node.full_path(self.dmap.on_disk)?;
668 665 if self.matcher.matches(path) {
669 666 if let EntryState::Removed = state {
670 667 self.push_outcome(Outcome::Removed, dirstate_node)?
671 668 } else {
672 669 self.push_outcome(Outcome::Deleted, &dirstate_node)?
673 670 }
674 671 }
675 672 }
676 673 Ok(())
677 674 }
678 675
679 676 /// Something in the filesystem has no corresponding dirstate node
680 677 ///
681 678 /// Returns whether that path is ignored
682 679 fn traverse_fs_only(
683 680 &self,
684 681 has_ignored_ancestor: bool,
685 682 directory_hg_path: &HgPath,
686 683 fs_entry: &DirEntry,
687 684 ) -> bool {
688 685 let hg_path = directory_hg_path.join(&fs_entry.base_name);
689 686 let file_type = fs_entry.metadata.file_type();
690 687 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
691 688 if file_type.is_dir() {
692 689 let is_ignored =
693 690 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
694 691 let traverse_children = if is_ignored {
695 692 // Descendants of an ignored directory are all ignored
696 693 self.options.list_ignored
697 694 } else {
698 695 // Descendants of an unknown directory may be either unknown or
699 696 // ignored
700 697 self.options.list_unknown || self.options.list_ignored
701 698 };
702 699 if traverse_children {
703 700 let is_at_repo_root = false;
704 701 if let Ok(children_fs_entries) = self.read_dir(
705 702 &hg_path,
706 703 &fs_entry.full_path,
707 704 is_at_repo_root,
708 705 ) {
709 706 children_fs_entries.par_iter().for_each(|child_fs_entry| {
710 707 self.traverse_fs_only(
711 708 is_ignored,
712 709 &hg_path,
713 710 child_fs_entry,
714 711 );
715 712 })
716 713 }
717 714 }
718 715 if self.options.collect_traversed_dirs {
719 716 self.outcome.lock().unwrap().traversed.push(hg_path.into())
720 717 }
721 718 is_ignored
722 719 } else {
723 720 if file_or_symlink {
724 721 if self.matcher.matches(&hg_path) {
725 722 self.mark_unknown_or_ignored(
726 723 has_ignored_ancestor,
727 724 &BorrowedPath::InMemory(&hg_path),
728 725 )
729 726 } else {
730 727 // We haven’t computed whether this path is ignored. It
731 728 // might not be, and a future run of status might have a
732 729 // different matcher that matches it. So treat it as not
733 730 // ignored. That is, inhibit readdir caching of the parent
734 731 // directory.
735 732 false
736 733 }
737 734 } else {
738 735 // This is neither a directory, a plain file, or a symlink.
739 736 // Treat it like an ignored file.
740 737 true
741 738 }
742 739 }
743 740 }
744 741
745 742 /// Returns whether that path is ignored
746 743 fn mark_unknown_or_ignored(
747 744 &self,
748 745 has_ignored_ancestor: bool,
749 746 hg_path: &BorrowedPath<'_, 'on_disk>,
750 747 ) -> bool {
751 748 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
752 749 if is_ignored {
753 750 if self.options.list_ignored {
754 751 self.push_outcome_without_copy_source(
755 752 Outcome::Ignored,
756 753 hg_path,
757 754 )
758 755 }
759 756 } else {
760 757 if self.options.list_unknown {
761 758 self.push_outcome_without_copy_source(
762 759 Outcome::Unknown,
763 760 hg_path,
764 761 )
765 762 }
766 763 }
767 764 is_ignored
768 765 }
769 766 }
770 767
771 768 struct DirEntry {
772 769 base_name: HgPathBuf,
773 770 full_path: PathBuf,
774 771 metadata: std::fs::Metadata,
775 772 }
776 773
777 774 impl DirEntry {
778 775 /// Returns **unsorted** entries in the given directory, with name and
779 776 /// metadata.
780 777 ///
781 778 /// If a `.hg` sub-directory is encountered:
782 779 ///
783 780 /// * At the repository root, ignore that sub-directory
784 781 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
785 782 /// list instead.
786 783 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
787 784 // `read_dir` returns a "not found" error for the empty path
788 785 let at_cwd = path == Path::new("");
789 786 let read_dir_path = if at_cwd { Path::new(".") } else { path };
790 787 let mut results = Vec::new();
791 788 for entry in read_dir_path.read_dir()? {
792 789 let entry = entry?;
793 790 let metadata = match entry.metadata() {
794 791 Ok(v) => v,
795 792 Err(e) => {
796 793 // race with file deletion?
797 794 if e.kind() == std::io::ErrorKind::NotFound {
798 795 continue;
799 796 } else {
800 797 return Err(e);
801 798 }
802 799 }
803 800 };
804 801 let file_name = entry.file_name();
805 802 // FIXME don't do this when cached
806 803 if file_name == ".hg" {
807 804 if is_at_repo_root {
808 805 // Skip the repo’s own .hg (might be a symlink)
809 806 continue;
810 807 } else if metadata.is_dir() {
811 808 // A .hg sub-directory at another location means a subrepo,
812 809 // skip it entirely.
813 810 return Ok(Vec::new());
814 811 }
815 812 }
816 813 let full_path = if at_cwd {
817 814 file_name.clone().into()
818 815 } else {
819 816 entry.path()
820 817 };
821 818 let base_name = get_bytes_from_os_string(file_name).into();
822 819 results.push(DirEntry {
823 820 base_name,
824 821 full_path,
825 822 metadata,
826 823 })
827 824 }
828 825 Ok(results)
829 826 }
830 827 }
831 828
832 829 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
833 830 /// of the give repository.
834 831 ///
835 832 /// This is similar to `SystemTime::now()`, with the result truncated to the
836 833 /// same time resolution as other files’ modification times. Using `.hg`
837 834 /// instead of the system’s default temporary directory (such as `/tmp`) makes
838 835 /// it more likely the temporary file is in the same disk partition as contents
839 836 /// of the working directory, which can matter since different filesystems may
840 837 /// store timestamps with different resolutions.
841 838 ///
842 839 /// This may fail, typically if we lack write permissions. In that case we
843 840 /// should continue the `status()` algoritm anyway and consider the current
844 841 /// date/time to be unknown.
845 842 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
846 843 tempfile::tempfile_in(repo_root.join(".hg"))?
847 844 .metadata()?
848 845 .modified()
849 846 }
General Comments 0
You need to be logged in to leave comments. Login now