##// END OF EJS Templates
rust-status: don't trigger dirstate v1 rewrite when only v2 data is changed...
Raphaël Gomès -
r50232:6cd24955 stable
parent child Browse files
Show More
@@ -1,1203 +1,1213 b''
1 1 use bytes_cast::BytesCast;
2 2 use micro_timer::timed;
3 3 use std::borrow::Cow;
4 4 use std::path::PathBuf;
5 5
6 6 use super::on_disk;
7 7 use super::on_disk::DirstateV2ParseError;
8 8 use super::owning::OwningDirstateMap;
9 9 use super::path_with_basename::WithBasename;
10 10 use crate::dirstate::parsers::pack_entry;
11 11 use crate::dirstate::parsers::packed_entry_size;
12 12 use crate::dirstate::parsers::parse_dirstate_entries;
13 13 use crate::dirstate::CopyMapIter;
14 14 use crate::dirstate::StateMapIter;
15 15 use crate::dirstate::TruncatedTimestamp;
16 16 use crate::dirstate::SIZE_FROM_OTHER_PARENT;
17 17 use crate::dirstate::SIZE_NON_NORMAL;
18 18 use crate::matchers::Matcher;
19 19 use crate::utils::hg_path::{HgPath, HgPathBuf};
20 20 use crate::DirstateEntry;
21 21 use crate::DirstateError;
22 22 use crate::DirstateParents;
23 23 use crate::DirstateStatus;
24 24 use crate::EntryState;
25 25 use crate::FastHashMap;
26 26 use crate::PatternFileWarning;
27 27 use crate::StatusError;
28 28 use crate::StatusOptions;
29 29
30 30 /// Append to an existing data file if the amount of unreachable data (not used
31 31 /// anymore) is less than this fraction of the total amount of existing data.
32 32 const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5;
33 33
34 #[derive(Debug, PartialEq, Eq)]
35 /// Version of the on-disk format
36 pub enum DirstateVersion {
37 V1,
38 V2,
39 }
40
34 41 pub struct DirstateMap<'on_disk> {
35 42 /// Contents of the `.hg/dirstate` file
36 43 pub(super) on_disk: &'on_disk [u8],
37 44
38 45 pub(super) root: ChildNodes<'on_disk>,
39 46
40 47 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
41 48 pub(super) nodes_with_entry_count: u32,
42 49
43 50 /// Number of nodes anywhere in the tree that have
44 51 /// `.copy_source.is_some()`.
45 52 pub(super) nodes_with_copy_source_count: u32,
46 53
47 54 /// See on_disk::Header
48 55 pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
49 56
50 57 /// How many bytes of `on_disk` are not used anymore
51 58 pub(super) unreachable_bytes: u32,
52 59
53 60 /// Size of the data used to first load this `DirstateMap`. Used in case
54 61 /// we need to write some new metadata, but no new data on disk.
55 62 pub(super) old_data_size: usize,
63
64 pub(super) dirstate_version: DirstateVersion,
56 65 }
57 66
58 67 /// Using a plain `HgPathBuf` of the full path from the repository root as a
59 68 /// map key would also work: all paths in a given map have the same parent
60 69 /// path, so comparing full paths gives the same result as comparing base
61 70 /// names. However `HashMap` would waste time always re-hashing the same
62 71 /// string prefix.
63 72 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
64 73
65 74 /// Similar to `&'tree Cow<'on_disk, HgPath>`, but can also be returned
66 75 /// for on-disk nodes that don’t actually have a `Cow` to borrow.
67 76 pub(super) enum BorrowedPath<'tree, 'on_disk> {
68 77 InMemory(&'tree HgPathBuf),
69 78 OnDisk(&'on_disk HgPath),
70 79 }
71 80
72 81 pub(super) enum ChildNodes<'on_disk> {
73 82 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
74 83 OnDisk(&'on_disk [on_disk::Node]),
75 84 }
76 85
77 86 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
78 87 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
79 88 OnDisk(&'on_disk [on_disk::Node]),
80 89 }
81 90
82 91 pub(super) enum NodeRef<'tree, 'on_disk> {
83 92 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
84 93 OnDisk(&'on_disk on_disk::Node),
85 94 }
86 95
87 96 impl<'tree, 'on_disk> BorrowedPath<'tree, 'on_disk> {
88 97 pub fn detach_from_tree(&self) -> Cow<'on_disk, HgPath> {
89 98 match *self {
90 99 BorrowedPath::InMemory(in_memory) => Cow::Owned(in_memory.clone()),
91 100 BorrowedPath::OnDisk(on_disk) => Cow::Borrowed(on_disk),
92 101 }
93 102 }
94 103 }
95 104
96 105 impl<'tree, 'on_disk> std::ops::Deref for BorrowedPath<'tree, 'on_disk> {
97 106 type Target = HgPath;
98 107
99 108 fn deref(&self) -> &HgPath {
100 109 match *self {
101 110 BorrowedPath::InMemory(in_memory) => in_memory,
102 111 BorrowedPath::OnDisk(on_disk) => on_disk,
103 112 }
104 113 }
105 114 }
106 115
107 116 impl Default for ChildNodes<'_> {
108 117 fn default() -> Self {
109 118 ChildNodes::InMemory(Default::default())
110 119 }
111 120 }
112 121
113 122 impl<'on_disk> ChildNodes<'on_disk> {
114 123 pub(super) fn as_ref<'tree>(
115 124 &'tree self,
116 125 ) -> ChildNodesRef<'tree, 'on_disk> {
117 126 match self {
118 127 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
119 128 ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes),
120 129 }
121 130 }
122 131
123 132 pub(super) fn is_empty(&self) -> bool {
124 133 match self {
125 134 ChildNodes::InMemory(nodes) => nodes.is_empty(),
126 135 ChildNodes::OnDisk(nodes) => nodes.is_empty(),
127 136 }
128 137 }
129 138
130 139 fn make_mut(
131 140 &mut self,
132 141 on_disk: &'on_disk [u8],
133 142 unreachable_bytes: &mut u32,
134 143 ) -> Result<
135 144 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
136 145 DirstateV2ParseError,
137 146 > {
138 147 match self {
139 148 ChildNodes::InMemory(nodes) => Ok(nodes),
140 149 ChildNodes::OnDisk(nodes) => {
141 150 *unreachable_bytes +=
142 151 std::mem::size_of_val::<[on_disk::Node]>(nodes) as u32;
143 152 let nodes = nodes
144 153 .iter()
145 154 .map(|node| {
146 155 Ok((
147 156 node.path(on_disk)?,
148 157 node.to_in_memory_node(on_disk)?,
149 158 ))
150 159 })
151 160 .collect::<Result<_, _>>()?;
152 161 *self = ChildNodes::InMemory(nodes);
153 162 match self {
154 163 ChildNodes::InMemory(nodes) => Ok(nodes),
155 164 ChildNodes::OnDisk(_) => unreachable!(),
156 165 }
157 166 }
158 167 }
159 168 }
160 169 }
161 170
162 171 impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
163 172 pub(super) fn get(
164 173 &self,
165 174 base_name: &HgPath,
166 175 on_disk: &'on_disk [u8],
167 176 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
168 177 match self {
169 178 ChildNodesRef::InMemory(nodes) => Ok(nodes
170 179 .get_key_value(base_name)
171 180 .map(|(k, v)| NodeRef::InMemory(k, v))),
172 181 ChildNodesRef::OnDisk(nodes) => {
173 182 let mut parse_result = Ok(());
174 183 let search_result = nodes.binary_search_by(|node| {
175 184 match node.base_name(on_disk) {
176 185 Ok(node_base_name) => node_base_name.cmp(base_name),
177 186 Err(e) => {
178 187 parse_result = Err(e);
179 188 // Dummy comparison result, `search_result` won’t
180 189 // be used since `parse_result` is an error
181 190 std::cmp::Ordering::Equal
182 191 }
183 192 }
184 193 });
185 194 parse_result.map(|()| {
186 195 search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i]))
187 196 })
188 197 }
189 198 }
190 199 }
191 200
192 201 /// Iterate in undefined order
193 202 pub(super) fn iter(
194 203 &self,
195 204 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
196 205 match self {
197 206 ChildNodesRef::InMemory(nodes) => itertools::Either::Left(
198 207 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)),
199 208 ),
200 209 ChildNodesRef::OnDisk(nodes) => {
201 210 itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk))
202 211 }
203 212 }
204 213 }
205 214
206 215 /// Iterate in parallel in undefined order
207 216 pub(super) fn par_iter(
208 217 &self,
209 218 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
210 219 {
211 220 use rayon::prelude::*;
212 221 match self {
213 222 ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left(
214 223 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)),
215 224 ),
216 225 ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right(
217 226 nodes.par_iter().map(NodeRef::OnDisk),
218 227 ),
219 228 }
220 229 }
221 230
222 231 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
223 232 match self {
224 233 ChildNodesRef::InMemory(nodes) => {
225 234 let mut vec: Vec<_> = nodes
226 235 .iter()
227 236 .map(|(k, v)| NodeRef::InMemory(k, v))
228 237 .collect();
229 238 fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
230 239 match node {
231 240 NodeRef::InMemory(path, _node) => path.base_name(),
232 241 NodeRef::OnDisk(_) => unreachable!(),
233 242 }
234 243 }
235 244 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
236 245 // value: https://github.com/rust-lang/rust/issues/34162
237 246 vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
238 247 vec
239 248 }
240 249 ChildNodesRef::OnDisk(nodes) => {
241 250 // Nodes on disk are already sorted
242 251 nodes.iter().map(NodeRef::OnDisk).collect()
243 252 }
244 253 }
245 254 }
246 255 }
247 256
248 257 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
249 258 pub(super) fn full_path(
250 259 &self,
251 260 on_disk: &'on_disk [u8],
252 261 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
253 262 match self {
254 263 NodeRef::InMemory(path, _node) => Ok(path.full_path()),
255 264 NodeRef::OnDisk(node) => node.full_path(on_disk),
256 265 }
257 266 }
258 267
259 268 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
260 269 /// HgPath>` detached from `'tree`
261 270 pub(super) fn full_path_borrowed(
262 271 &self,
263 272 on_disk: &'on_disk [u8],
264 273 ) -> Result<BorrowedPath<'tree, 'on_disk>, DirstateV2ParseError> {
265 274 match self {
266 275 NodeRef::InMemory(path, _node) => match path.full_path() {
267 276 Cow::Borrowed(on_disk) => Ok(BorrowedPath::OnDisk(on_disk)),
268 277 Cow::Owned(in_memory) => Ok(BorrowedPath::InMemory(in_memory)),
269 278 },
270 279 NodeRef::OnDisk(node) => {
271 280 Ok(BorrowedPath::OnDisk(node.full_path(on_disk)?))
272 281 }
273 282 }
274 283 }
275 284
276 285 pub(super) fn base_name(
277 286 &self,
278 287 on_disk: &'on_disk [u8],
279 288 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
280 289 match self {
281 290 NodeRef::InMemory(path, _node) => Ok(path.base_name()),
282 291 NodeRef::OnDisk(node) => node.base_name(on_disk),
283 292 }
284 293 }
285 294
286 295 pub(super) fn children(
287 296 &self,
288 297 on_disk: &'on_disk [u8],
289 298 ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
290 299 match self {
291 300 NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
292 301 NodeRef::OnDisk(node) => {
293 302 Ok(ChildNodesRef::OnDisk(node.children(on_disk)?))
294 303 }
295 304 }
296 305 }
297 306
298 307 pub(super) fn has_copy_source(&self) -> bool {
299 308 match self {
300 309 NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
301 310 NodeRef::OnDisk(node) => node.has_copy_source(),
302 311 }
303 312 }
304 313
305 314 pub(super) fn copy_source(
306 315 &self,
307 316 on_disk: &'on_disk [u8],
308 317 ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
309 318 match self {
310 319 NodeRef::InMemory(_path, node) => {
311 320 Ok(node.copy_source.as_ref().map(|s| &**s))
312 321 }
313 322 NodeRef::OnDisk(node) => node.copy_source(on_disk),
314 323 }
315 324 }
316 325 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
317 326 /// HgPath>` detached from `'tree`
318 327 pub(super) fn copy_source_borrowed(
319 328 &self,
320 329 on_disk: &'on_disk [u8],
321 330 ) -> Result<Option<BorrowedPath<'tree, 'on_disk>>, DirstateV2ParseError>
322 331 {
323 332 Ok(match self {
324 333 NodeRef::InMemory(_path, node) => {
325 334 node.copy_source.as_ref().map(|source| match source {
326 335 Cow::Borrowed(on_disk) => BorrowedPath::OnDisk(on_disk),
327 336 Cow::Owned(in_memory) => BorrowedPath::InMemory(in_memory),
328 337 })
329 338 }
330 339 NodeRef::OnDisk(node) => node
331 340 .copy_source(on_disk)?
332 341 .map(|source| BorrowedPath::OnDisk(source)),
333 342 })
334 343 }
335 344
336 345 pub(super) fn entry(
337 346 &self,
338 347 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
339 348 match self {
340 349 NodeRef::InMemory(_path, node) => {
341 350 Ok(node.data.as_entry().copied())
342 351 }
343 352 NodeRef::OnDisk(node) => node.entry(),
344 353 }
345 354 }
346 355
347 356 pub(super) fn state(
348 357 &self,
349 358 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
350 359 Ok(self.entry()?.map(|e| e.state()))
351 360 }
352 361
353 362 pub(super) fn cached_directory_mtime(
354 363 &self,
355 364 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
356 365 match self {
357 366 NodeRef::InMemory(_path, node) => Ok(match node.data {
358 367 NodeData::CachedDirectory { mtime } => Some(mtime),
359 368 _ => None,
360 369 }),
361 370 NodeRef::OnDisk(node) => node.cached_directory_mtime(),
362 371 }
363 372 }
364 373
365 374 pub(super) fn descendants_with_entry_count(&self) -> u32 {
366 375 match self {
367 376 NodeRef::InMemory(_path, node) => {
368 377 node.descendants_with_entry_count
369 378 }
370 379 NodeRef::OnDisk(node) => node.descendants_with_entry_count.get(),
371 380 }
372 381 }
373 382
374 383 pub(super) fn tracked_descendants_count(&self) -> u32 {
375 384 match self {
376 385 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
377 386 NodeRef::OnDisk(node) => node.tracked_descendants_count.get(),
378 387 }
379 388 }
380 389 }
381 390
382 391 /// Represents a file or a directory
383 392 #[derive(Default)]
384 393 pub(super) struct Node<'on_disk> {
385 394 pub(super) data: NodeData,
386 395
387 396 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
388 397
389 398 pub(super) children: ChildNodes<'on_disk>,
390 399
391 400 /// How many (non-inclusive) descendants of this node have an entry.
392 401 pub(super) descendants_with_entry_count: u32,
393 402
394 403 /// How many (non-inclusive) descendants of this node have an entry whose
395 404 /// state is "tracked".
396 405 pub(super) tracked_descendants_count: u32,
397 406 }
398 407
399 408 pub(super) enum NodeData {
400 409 Entry(DirstateEntry),
401 410 CachedDirectory { mtime: TruncatedTimestamp },
402 411 None,
403 412 }
404 413
405 414 impl Default for NodeData {
406 415 fn default() -> Self {
407 416 NodeData::None
408 417 }
409 418 }
410 419
411 420 impl NodeData {
412 421 fn has_entry(&self) -> bool {
413 422 match self {
414 423 NodeData::Entry(_) => true,
415 424 _ => false,
416 425 }
417 426 }
418 427
419 428 fn as_entry(&self) -> Option<&DirstateEntry> {
420 429 match self {
421 430 NodeData::Entry(entry) => Some(entry),
422 431 _ => None,
423 432 }
424 433 }
425 434 }
426 435
427 436 impl<'on_disk> DirstateMap<'on_disk> {
428 437 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
429 438 Self {
430 439 on_disk,
431 440 root: ChildNodes::default(),
432 441 nodes_with_entry_count: 0,
433 442 nodes_with_copy_source_count: 0,
434 443 ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
435 444 unreachable_bytes: 0,
436 445 old_data_size: 0,
446 dirstate_version: DirstateVersion::V1,
437 447 }
438 448 }
439 449
440 450 #[timed]
441 451 pub fn new_v2(
442 452 on_disk: &'on_disk [u8],
443 453 data_size: usize,
444 454 metadata: &[u8],
445 455 ) -> Result<Self, DirstateError> {
446 456 if let Some(data) = on_disk.get(..data_size) {
447 457 Ok(on_disk::read(data, metadata)?)
448 458 } else {
449 459 Err(DirstateV2ParseError.into())
450 460 }
451 461 }
452 462
453 463 #[timed]
454 464 pub fn new_v1(
455 465 on_disk: &'on_disk [u8],
456 466 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
457 467 let mut map = Self::empty(on_disk);
458 468 if map.on_disk.is_empty() {
459 469 return Ok((map, None));
460 470 }
461 471
462 472 let parents = parse_dirstate_entries(
463 473 map.on_disk,
464 474 |path, entry, copy_source| {
465 475 let tracked = entry.state().is_tracked();
466 476 let node = Self::get_or_insert_node(
467 477 map.on_disk,
468 478 &mut map.unreachable_bytes,
469 479 &mut map.root,
470 480 path,
471 481 WithBasename::to_cow_borrowed,
472 482 |ancestor| {
473 483 if tracked {
474 484 ancestor.tracked_descendants_count += 1
475 485 }
476 486 ancestor.descendants_with_entry_count += 1
477 487 },
478 488 )?;
479 489 assert!(
480 490 !node.data.has_entry(),
481 491 "duplicate dirstate entry in read"
482 492 );
483 493 assert!(
484 494 node.copy_source.is_none(),
485 495 "duplicate dirstate entry in read"
486 496 );
487 497 node.data = NodeData::Entry(*entry);
488 498 node.copy_source = copy_source.map(Cow::Borrowed);
489 499 map.nodes_with_entry_count += 1;
490 500 if copy_source.is_some() {
491 501 map.nodes_with_copy_source_count += 1
492 502 }
493 503 Ok(())
494 504 },
495 505 )?;
496 506 let parents = Some(parents.clone());
497 507
498 508 Ok((map, parents))
499 509 }
500 510
501 511 /// Assuming dirstate-v2 format, returns whether the next write should
502 512 /// append to the existing data file that contains `self.on_disk` (true),
503 513 /// or create a new data file from scratch (false).
504 514 pub(super) fn write_should_append(&self) -> bool {
505 515 let ratio = self.unreachable_bytes as f32 / self.on_disk.len() as f32;
506 516 ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
507 517 }
508 518
509 519 fn get_node<'tree>(
510 520 &'tree self,
511 521 path: &HgPath,
512 522 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
513 523 let mut children = self.root.as_ref();
514 524 let mut components = path.components();
515 525 let mut component =
516 526 components.next().expect("expected at least one components");
517 527 loop {
518 528 if let Some(child) = children.get(component, self.on_disk)? {
519 529 if let Some(next_component) = components.next() {
520 530 component = next_component;
521 531 children = child.children(self.on_disk)?;
522 532 } else {
523 533 return Ok(Some(child));
524 534 }
525 535 } else {
526 536 return Ok(None);
527 537 }
528 538 }
529 539 }
530 540
531 541 /// Returns a mutable reference to the node at `path` if it exists
532 542 ///
533 543 /// This takes `root` instead of `&mut self` so that callers can mutate
534 544 /// other fields while the returned borrow is still valid
535 545 fn get_node_mut<'tree>(
536 546 on_disk: &'on_disk [u8],
537 547 unreachable_bytes: &mut u32,
538 548 root: &'tree mut ChildNodes<'on_disk>,
539 549 path: &HgPath,
540 550 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
541 551 let mut children = root;
542 552 let mut components = path.components();
543 553 let mut component =
544 554 components.next().expect("expected at least one components");
545 555 loop {
546 556 if let Some(child) = children
547 557 .make_mut(on_disk, unreachable_bytes)?
548 558 .get_mut(component)
549 559 {
550 560 if let Some(next_component) = components.next() {
551 561 component = next_component;
552 562 children = &mut child.children;
553 563 } else {
554 564 return Ok(Some(child));
555 565 }
556 566 } else {
557 567 return Ok(None);
558 568 }
559 569 }
560 570 }
561 571
562 572 pub(super) fn get_or_insert<'tree, 'path>(
563 573 &'tree mut self,
564 574 path: &HgPath,
565 575 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
566 576 Self::get_or_insert_node(
567 577 self.on_disk,
568 578 &mut self.unreachable_bytes,
569 579 &mut self.root,
570 580 path,
571 581 WithBasename::to_cow_owned,
572 582 |_| {},
573 583 )
574 584 }
575 585
576 586 fn get_or_insert_node<'tree, 'path>(
577 587 on_disk: &'on_disk [u8],
578 588 unreachable_bytes: &mut u32,
579 589 root: &'tree mut ChildNodes<'on_disk>,
580 590 path: &'path HgPath,
581 591 to_cow: impl Fn(
582 592 WithBasename<&'path HgPath>,
583 593 ) -> WithBasename<Cow<'on_disk, HgPath>>,
584 594 mut each_ancestor: impl FnMut(&mut Node),
585 595 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
586 596 let mut child_nodes = root;
587 597 let mut inclusive_ancestor_paths =
588 598 WithBasename::inclusive_ancestors_of(path);
589 599 let mut ancestor_path = inclusive_ancestor_paths
590 600 .next()
591 601 .expect("expected at least one inclusive ancestor");
592 602 loop {
593 603 // TODO: can we avoid allocating an owned key in cases where the
594 604 // map already contains that key, without introducing double
595 605 // lookup?
596 606 let child_node = child_nodes
597 607 .make_mut(on_disk, unreachable_bytes)?
598 608 .entry(to_cow(ancestor_path))
599 609 .or_default();
600 610 if let Some(next) = inclusive_ancestor_paths.next() {
601 611 each_ancestor(child_node);
602 612 ancestor_path = next;
603 613 child_nodes = &mut child_node.children;
604 614 } else {
605 615 return Ok(child_node);
606 616 }
607 617 }
608 618 }
609 619
610 620 fn add_or_remove_file(
611 621 &mut self,
612 622 path: &HgPath,
613 623 old_state: Option<EntryState>,
614 624 new_entry: DirstateEntry,
615 625 ) -> Result<(), DirstateV2ParseError> {
616 626 let had_entry = old_state.is_some();
617 627 let was_tracked = old_state.map_or(false, |s| s.is_tracked());
618 628 let tracked_count_increment =
619 629 match (was_tracked, new_entry.state().is_tracked()) {
620 630 (false, true) => 1,
621 631 (true, false) => -1,
622 632 _ => 0,
623 633 };
624 634
625 635 let node = Self::get_or_insert_node(
626 636 self.on_disk,
627 637 &mut self.unreachable_bytes,
628 638 &mut self.root,
629 639 path,
630 640 WithBasename::to_cow_owned,
631 641 |ancestor| {
632 642 if !had_entry {
633 643 ancestor.descendants_with_entry_count += 1;
634 644 }
635 645
636 646 // We can’t use `+= increment` because the counter is unsigned,
637 647 // and we want debug builds to detect accidental underflow
638 648 // through zero
639 649 match tracked_count_increment {
640 650 1 => ancestor.tracked_descendants_count += 1,
641 651 -1 => ancestor.tracked_descendants_count -= 1,
642 652 _ => {}
643 653 }
644 654 },
645 655 )?;
646 656 if !had_entry {
647 657 self.nodes_with_entry_count += 1
648 658 }
649 659 node.data = NodeData::Entry(new_entry);
650 660 Ok(())
651 661 }
652 662
653 663 fn iter_nodes<'tree>(
654 664 &'tree self,
655 665 ) -> impl Iterator<
656 666 Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>,
657 667 > + 'tree {
658 668 // Depth first tree traversal.
659 669 //
660 670 // If we could afford internal iteration and recursion,
661 671 // this would look like:
662 672 //
663 673 // ```
664 674 // fn traverse_children(
665 675 // children: &ChildNodes,
666 676 // each: &mut impl FnMut(&Node),
667 677 // ) {
668 678 // for child in children.values() {
669 679 // traverse_children(&child.children, each);
670 680 // each(child);
671 681 // }
672 682 // }
673 683 // ```
674 684 //
675 685 // However we want an external iterator and therefore can’t use the
676 686 // call stack. Use an explicit stack instead:
677 687 let mut stack = Vec::new();
678 688 let mut iter = self.root.as_ref().iter();
679 689 std::iter::from_fn(move || {
680 690 while let Some(child_node) = iter.next() {
681 691 let children = match child_node.children(self.on_disk) {
682 692 Ok(children) => children,
683 693 Err(error) => return Some(Err(error)),
684 694 };
685 695 // Pseudo-recursion
686 696 let new_iter = children.iter();
687 697 let old_iter = std::mem::replace(&mut iter, new_iter);
688 698 stack.push((child_node, old_iter));
689 699 }
690 700 // Found the end of a `children.iter()` iterator.
691 701 if let Some((child_node, next_iter)) = stack.pop() {
692 702 // "Return" from pseudo-recursion by restoring state from the
693 703 // explicit stack
694 704 iter = next_iter;
695 705
696 706 Some(Ok(child_node))
697 707 } else {
698 708 // Reached the bottom of the stack, we’re done
699 709 None
700 710 }
701 711 })
702 712 }
703 713
704 714 fn count_dropped_path(unreachable_bytes: &mut u32, path: &Cow<HgPath>) {
705 715 if let Cow::Borrowed(path) = path {
706 716 *unreachable_bytes += path.len() as u32
707 717 }
708 718 }
709 719 }
710 720
711 721 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
712 722 ///
713 723 /// The callback is only called for incoming `Ok` values. Errors are passed
714 724 /// through as-is. In order to let it use the `?` operator the callback is
715 725 /// expected to return a `Result` of `Option`, instead of an `Option` of
716 726 /// `Result`.
717 727 fn filter_map_results<'a, I, F, A, B, E>(
718 728 iter: I,
719 729 f: F,
720 730 ) -> impl Iterator<Item = Result<B, E>> + 'a
721 731 where
722 732 I: Iterator<Item = Result<A, E>> + 'a,
723 733 F: Fn(A) -> Result<Option<B>, E> + 'a,
724 734 {
725 735 iter.filter_map(move |result| match result {
726 736 Ok(node) => f(node).transpose(),
727 737 Err(e) => Some(Err(e)),
728 738 })
729 739 }
730 740
731 741 impl OwningDirstateMap {
732 742 pub fn clear(&mut self) {
733 743 self.with_dmap_mut(|map| {
734 744 map.root = Default::default();
735 745 map.nodes_with_entry_count = 0;
736 746 map.nodes_with_copy_source_count = 0;
737 747 });
738 748 }
739 749
740 750 pub fn set_entry(
741 751 &mut self,
742 752 filename: &HgPath,
743 753 entry: DirstateEntry,
744 754 ) -> Result<(), DirstateV2ParseError> {
745 755 self.with_dmap_mut(|map| {
746 756 map.get_or_insert(&filename)?.data = NodeData::Entry(entry);
747 757 Ok(())
748 758 })
749 759 }
750 760
751 761 pub fn add_file(
752 762 &mut self,
753 763 filename: &HgPath,
754 764 entry: DirstateEntry,
755 765 ) -> Result<(), DirstateError> {
756 766 let old_state = self.get(filename)?.map(|e| e.state());
757 767 self.with_dmap_mut(|map| {
758 768 Ok(map.add_or_remove_file(filename, old_state, entry)?)
759 769 })
760 770 }
761 771
762 772 pub fn remove_file(
763 773 &mut self,
764 774 filename: &HgPath,
765 775 in_merge: bool,
766 776 ) -> Result<(), DirstateError> {
767 777 let old_entry_opt = self.get(filename)?;
768 778 let old_state = old_entry_opt.map(|e| e.state());
769 779 let mut size = 0;
770 780 if in_merge {
771 781 // XXX we should not be able to have 'm' state and 'FROM_P2' if not
772 782 // during a merge. So I (marmoute) am not sure we need the
773 783 // conditionnal at all. Adding double checking this with assert
774 784 // would be nice.
775 785 if let Some(old_entry) = old_entry_opt {
776 786 // backup the previous state
777 787 if old_entry.state() == EntryState::Merged {
778 788 size = SIZE_NON_NORMAL;
779 789 } else if old_entry.state() == EntryState::Normal
780 790 && old_entry.size() == SIZE_FROM_OTHER_PARENT
781 791 {
782 792 // other parent
783 793 size = SIZE_FROM_OTHER_PARENT;
784 794 }
785 795 }
786 796 }
787 797 if size == 0 {
788 798 self.copy_map_remove(filename)?;
789 799 }
790 800 self.with_dmap_mut(|map| {
791 801 let entry = DirstateEntry::new_removed(size);
792 802 Ok(map.add_or_remove_file(filename, old_state, entry)?)
793 803 })
794 804 }
795 805
796 806 pub fn drop_entry_and_copy_source(
797 807 &mut self,
798 808 filename: &HgPath,
799 809 ) -> Result<(), DirstateError> {
800 810 let was_tracked = self
801 811 .get(filename)?
802 812 .map_or(false, |e| e.state().is_tracked());
803 813 struct Dropped {
804 814 was_tracked: bool,
805 815 had_entry: bool,
806 816 had_copy_source: bool,
807 817 }
808 818
809 819 /// If this returns `Ok(Some((dropped, removed)))`, then
810 820 ///
811 821 /// * `dropped` is about the leaf node that was at `filename`
812 822 /// * `removed` is whether this particular level of recursion just
813 823 /// removed a node in `nodes`.
814 824 fn recur<'on_disk>(
815 825 on_disk: &'on_disk [u8],
816 826 unreachable_bytes: &mut u32,
817 827 nodes: &mut ChildNodes<'on_disk>,
818 828 path: &HgPath,
819 829 ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
820 830 let (first_path_component, rest_of_path) =
821 831 path.split_first_component();
822 832 let nodes = nodes.make_mut(on_disk, unreachable_bytes)?;
823 833 let node = if let Some(node) = nodes.get_mut(first_path_component)
824 834 {
825 835 node
826 836 } else {
827 837 return Ok(None);
828 838 };
829 839 let dropped;
830 840 if let Some(rest) = rest_of_path {
831 841 if let Some((d, removed)) = recur(
832 842 on_disk,
833 843 unreachable_bytes,
834 844 &mut node.children,
835 845 rest,
836 846 )? {
837 847 dropped = d;
838 848 if dropped.had_entry {
839 849 node.descendants_with_entry_count = node
840 850 .descendants_with_entry_count
841 851 .checked_sub(1)
842 852 .expect(
843 853 "descendants_with_entry_count should be >= 0",
844 854 );
845 855 }
846 856 if dropped.was_tracked {
847 857 node.tracked_descendants_count = node
848 858 .tracked_descendants_count
849 859 .checked_sub(1)
850 860 .expect(
851 861 "tracked_descendants_count should be >= 0",
852 862 );
853 863 }
854 864
855 865 // Directory caches must be invalidated when removing a
856 866 // child node
857 867 if removed {
858 868 if let NodeData::CachedDirectory { .. } = &node.data {
859 869 node.data = NodeData::None
860 870 }
861 871 }
862 872 } else {
863 873 return Ok(None);
864 874 }
865 875 } else {
866 876 let entry = node.data.as_entry();
867 877 let was_tracked = entry.map_or(false, |entry| entry.tracked());
868 878 let had_entry = entry.is_some();
869 879 if had_entry {
870 880 node.data = NodeData::None
871 881 }
872 882 let mut had_copy_source = false;
873 883 if let Some(source) = &node.copy_source {
874 884 DirstateMap::count_dropped_path(unreachable_bytes, source);
875 885 had_copy_source = true;
876 886 node.copy_source = None
877 887 }
878 888 dropped = Dropped {
879 889 was_tracked,
880 890 had_entry,
881 891 had_copy_source,
882 892 };
883 893 }
884 894 // After recursion, for both leaf (rest_of_path is None) nodes and
885 895 // parent nodes, remove a node if it just became empty.
886 896 let remove = !node.data.has_entry()
887 897 && node.copy_source.is_none()
888 898 && node.children.is_empty();
889 899 if remove {
890 900 let (key, _) =
891 901 nodes.remove_entry(first_path_component).unwrap();
892 902 DirstateMap::count_dropped_path(
893 903 unreachable_bytes,
894 904 key.full_path(),
895 905 )
896 906 }
897 907 Ok(Some((dropped, remove)))
898 908 }
899 909
900 910 self.with_dmap_mut(|map| {
901 911 if let Some((dropped, _removed)) = recur(
902 912 map.on_disk,
903 913 &mut map.unreachable_bytes,
904 914 &mut map.root,
905 915 filename,
906 916 )? {
907 917 if dropped.had_entry {
908 918 map.nodes_with_entry_count = map
909 919 .nodes_with_entry_count
910 920 .checked_sub(1)
911 921 .expect("nodes_with_entry_count should be >= 0");
912 922 }
913 923 if dropped.had_copy_source {
914 924 map.nodes_with_copy_source_count = map
915 925 .nodes_with_copy_source_count
916 926 .checked_sub(1)
917 927 .expect("nodes_with_copy_source_count should be >= 0");
918 928 }
919 929 } else {
920 930 debug_assert!(!was_tracked);
921 931 }
922 932 Ok(())
923 933 })
924 934 }
925 935
926 936 pub fn has_tracked_dir(
927 937 &mut self,
928 938 directory: &HgPath,
929 939 ) -> Result<bool, DirstateError> {
930 940 self.with_dmap_mut(|map| {
931 941 if let Some(node) = map.get_node(directory)? {
932 942 // A node without a `DirstateEntry` was created to hold child
933 943 // nodes, and is therefore a directory.
934 944 let state = node.state()?;
935 945 Ok(state.is_none() && node.tracked_descendants_count() > 0)
936 946 } else {
937 947 Ok(false)
938 948 }
939 949 })
940 950 }
941 951
942 952 pub fn has_dir(
943 953 &mut self,
944 954 directory: &HgPath,
945 955 ) -> Result<bool, DirstateError> {
946 956 self.with_dmap_mut(|map| {
947 957 if let Some(node) = map.get_node(directory)? {
948 958 // A node without a `DirstateEntry` was created to hold child
949 959 // nodes, and is therefore a directory.
950 960 let state = node.state()?;
951 961 Ok(state.is_none() && node.descendants_with_entry_count() > 0)
952 962 } else {
953 963 Ok(false)
954 964 }
955 965 })
956 966 }
957 967
958 968 #[timed]
959 969 pub fn pack_v1(
960 970 &self,
961 971 parents: DirstateParents,
962 972 ) -> Result<Vec<u8>, DirstateError> {
963 973 let map = self.get_map();
964 974 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
965 975 // reallocations
966 976 let mut size = parents.as_bytes().len();
967 977 for node in map.iter_nodes() {
968 978 let node = node?;
969 979 if node.entry()?.is_some() {
970 980 size += packed_entry_size(
971 981 node.full_path(map.on_disk)?,
972 982 node.copy_source(map.on_disk)?,
973 983 );
974 984 }
975 985 }
976 986
977 987 let mut packed = Vec::with_capacity(size);
978 988 packed.extend(parents.as_bytes());
979 989
980 990 for node in map.iter_nodes() {
981 991 let node = node?;
982 992 if let Some(entry) = node.entry()? {
983 993 pack_entry(
984 994 node.full_path(map.on_disk)?,
985 995 &entry,
986 996 node.copy_source(map.on_disk)?,
987 997 &mut packed,
988 998 );
989 999 }
990 1000 }
991 1001 Ok(packed)
992 1002 }
993 1003
994 1004 /// Returns new data and metadata together with whether that data should be
995 1005 /// appended to the existing data file whose content is at
996 1006 /// `map.on_disk` (true), instead of written to a new data file
997 1007 /// (false), and the previous size of data on disk.
998 1008 #[timed]
999 1009 pub fn pack_v2(
1000 1010 &self,
1001 1011 can_append: bool,
1002 1012 ) -> Result<(Vec<u8>, on_disk::TreeMetadata, bool, usize), DirstateError>
1003 1013 {
1004 1014 let map = self.get_map();
1005 1015 on_disk::write(map, can_append)
1006 1016 }
1007 1017
1008 1018 /// `callback` allows the caller to process and do something with the
1009 1019 /// results of the status. This is needed to do so efficiently (i.e.
1010 1020 /// without cloning the `DirstateStatus` object with its paths) because
1011 1021 /// we need to borrow from `Self`.
1012 1022 pub fn with_status<R>(
1013 1023 &mut self,
1014 1024 matcher: &(dyn Matcher + Sync),
1015 1025 root_dir: PathBuf,
1016 1026 ignore_files: Vec<PathBuf>,
1017 1027 options: StatusOptions,
1018 1028 callback: impl for<'r> FnOnce(
1019 1029 Result<(DirstateStatus<'r>, Vec<PatternFileWarning>), StatusError>,
1020 1030 ) -> R,
1021 1031 ) -> R {
1022 1032 self.with_dmap_mut(|map| {
1023 1033 callback(super::status::status(
1024 1034 map,
1025 1035 matcher,
1026 1036 root_dir,
1027 1037 ignore_files,
1028 1038 options,
1029 1039 ))
1030 1040 })
1031 1041 }
1032 1042
1033 1043 pub fn copy_map_len(&self) -> usize {
1034 1044 let map = self.get_map();
1035 1045 map.nodes_with_copy_source_count as usize
1036 1046 }
1037 1047
1038 1048 pub fn copy_map_iter(&self) -> CopyMapIter<'_> {
1039 1049 let map = self.get_map();
1040 1050 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1041 1051 Ok(if let Some(source) = node.copy_source(map.on_disk)? {
1042 1052 Some((node.full_path(map.on_disk)?, source))
1043 1053 } else {
1044 1054 None
1045 1055 })
1046 1056 }))
1047 1057 }
1048 1058
1049 1059 pub fn copy_map_contains_key(
1050 1060 &self,
1051 1061 key: &HgPath,
1052 1062 ) -> Result<bool, DirstateV2ParseError> {
1053 1063 let map = self.get_map();
1054 1064 Ok(if let Some(node) = map.get_node(key)? {
1055 1065 node.has_copy_source()
1056 1066 } else {
1057 1067 false
1058 1068 })
1059 1069 }
1060 1070
1061 1071 pub fn copy_map_get(
1062 1072 &self,
1063 1073 key: &HgPath,
1064 1074 ) -> Result<Option<&HgPath>, DirstateV2ParseError> {
1065 1075 let map = self.get_map();
1066 1076 if let Some(node) = map.get_node(key)? {
1067 1077 if let Some(source) = node.copy_source(map.on_disk)? {
1068 1078 return Ok(Some(source));
1069 1079 }
1070 1080 }
1071 1081 Ok(None)
1072 1082 }
1073 1083
1074 1084 pub fn copy_map_remove(
1075 1085 &mut self,
1076 1086 key: &HgPath,
1077 1087 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1078 1088 self.with_dmap_mut(|map| {
1079 1089 let count = &mut map.nodes_with_copy_source_count;
1080 1090 let unreachable_bytes = &mut map.unreachable_bytes;
1081 1091 Ok(DirstateMap::get_node_mut(
1082 1092 map.on_disk,
1083 1093 unreachable_bytes,
1084 1094 &mut map.root,
1085 1095 key,
1086 1096 )?
1087 1097 .and_then(|node| {
1088 1098 if let Some(source) = &node.copy_source {
1089 1099 *count -= 1;
1090 1100 DirstateMap::count_dropped_path(unreachable_bytes, source);
1091 1101 }
1092 1102 node.copy_source.take().map(Cow::into_owned)
1093 1103 }))
1094 1104 })
1095 1105 }
1096 1106
1097 1107 pub fn copy_map_insert(
1098 1108 &mut self,
1099 1109 key: HgPathBuf,
1100 1110 value: HgPathBuf,
1101 1111 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1102 1112 self.with_dmap_mut(|map| {
1103 1113 let node = DirstateMap::get_or_insert_node(
1104 1114 map.on_disk,
1105 1115 &mut map.unreachable_bytes,
1106 1116 &mut map.root,
1107 1117 &key,
1108 1118 WithBasename::to_cow_owned,
1109 1119 |_ancestor| {},
1110 1120 )?;
1111 1121 if node.copy_source.is_none() {
1112 1122 map.nodes_with_copy_source_count += 1
1113 1123 }
1114 1124 Ok(node.copy_source.replace(value.into()).map(Cow::into_owned))
1115 1125 })
1116 1126 }
1117 1127
1118 1128 pub fn len(&self) -> usize {
1119 1129 let map = self.get_map();
1120 1130 map.nodes_with_entry_count as usize
1121 1131 }
1122 1132
1123 1133 pub fn contains_key(
1124 1134 &self,
1125 1135 key: &HgPath,
1126 1136 ) -> Result<bool, DirstateV2ParseError> {
1127 1137 Ok(self.get(key)?.is_some())
1128 1138 }
1129 1139
1130 1140 pub fn get(
1131 1141 &self,
1132 1142 key: &HgPath,
1133 1143 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
1134 1144 let map = self.get_map();
1135 1145 Ok(if let Some(node) = map.get_node(key)? {
1136 1146 node.entry()?
1137 1147 } else {
1138 1148 None
1139 1149 })
1140 1150 }
1141 1151
1142 1152 pub fn iter(&self) -> StateMapIter<'_> {
1143 1153 let map = self.get_map();
1144 1154 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1145 1155 Ok(if let Some(entry) = node.entry()? {
1146 1156 Some((node.full_path(map.on_disk)?, entry))
1147 1157 } else {
1148 1158 None
1149 1159 })
1150 1160 }))
1151 1161 }
1152 1162
1153 1163 pub fn iter_tracked_dirs(
1154 1164 &mut self,
1155 1165 ) -> Result<
1156 1166 Box<
1157 1167 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
1158 1168 + Send
1159 1169 + '_,
1160 1170 >,
1161 1171 DirstateError,
1162 1172 > {
1163 1173 let map = self.get_map();
1164 1174 let on_disk = map.on_disk;
1165 1175 Ok(Box::new(filter_map_results(
1166 1176 map.iter_nodes(),
1167 1177 move |node| {
1168 1178 Ok(if node.tracked_descendants_count() > 0 {
1169 1179 Some(node.full_path(on_disk)?)
1170 1180 } else {
1171 1181 None
1172 1182 })
1173 1183 },
1174 1184 )))
1175 1185 }
1176 1186
1177 1187 pub fn debug_iter(
1178 1188 &self,
1179 1189 all: bool,
1180 1190 ) -> Box<
1181 1191 dyn Iterator<
1182 1192 Item = Result<
1183 1193 (&HgPath, (u8, i32, i32, i32)),
1184 1194 DirstateV2ParseError,
1185 1195 >,
1186 1196 > + Send
1187 1197 + '_,
1188 1198 > {
1189 1199 let map = self.get_map();
1190 1200 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1191 1201 let debug_tuple = if let Some(entry) = node.entry()? {
1192 1202 entry.debug_tuple()
1193 1203 } else if !all {
1194 1204 return Ok(None);
1195 1205 } else if let Some(mtime) = node.cached_directory_mtime()? {
1196 1206 (b' ', 0, -1, mtime.truncated_seconds() as i32)
1197 1207 } else {
1198 1208 (b' ', 0, -1, -1)
1199 1209 };
1200 1210 Ok(Some((node.full_path(map.on_disk)?, debug_tuple)))
1201 1211 }))
1202 1212 }
1203 1213 }
@@ -1,849 +1,853 b''
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
4 4
5 5 use crate::dirstate::TruncatedTimestamp;
6 use crate::dirstate_tree::dirstate_map::DirstateVersion;
6 7 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
7 8 use crate::dirstate_tree::path_with_basename::WithBasename;
8 9 use crate::errors::HgError;
9 10 use crate::utils::hg_path::HgPath;
10 11 use crate::DirstateEntry;
11 12 use crate::DirstateError;
12 13 use crate::DirstateParents;
13 14 use bitflags::bitflags;
14 15 use bytes_cast::unaligned::{U16Be, U32Be};
15 16 use bytes_cast::BytesCast;
16 17 use format_bytes::format_bytes;
17 18 use rand::Rng;
18 19 use std::borrow::Cow;
19 20 use std::convert::{TryFrom, TryInto};
20 21 use std::fmt::Write;
21 22
22 23 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
23 24 /// This a redundant sanity check more than an actual "magic number" since
24 25 /// `.hg/requires` already governs which format should be used.
25 26 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
26 27
27 28 /// Keep space for 256-bit hashes
28 29 const STORED_NODE_ID_BYTES: usize = 32;
29 30
30 31 /// … even though only 160 bits are used for now, with SHA-1
31 32 const USED_NODE_ID_BYTES: usize = 20;
32 33
33 34 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
34 35 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
35 36
36 37 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
37 38 const TREE_METADATA_SIZE: usize = 44;
38 39 const NODE_SIZE: usize = 44;
39 40
40 41 /// Make sure that size-affecting changes are made knowingly
41 42 #[allow(unused)]
42 43 fn static_assert_size_of() {
43 44 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
44 45 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
45 46 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
46 47 }
47 48
48 49 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
49 50 #[derive(BytesCast)]
50 51 #[repr(C)]
51 52 struct DocketHeader {
52 53 marker: [u8; V2_FORMAT_MARKER.len()],
53 54 parent_1: [u8; STORED_NODE_ID_BYTES],
54 55 parent_2: [u8; STORED_NODE_ID_BYTES],
55 56
56 57 metadata: TreeMetadata,
57 58
58 59 /// Counted in bytes
59 60 data_size: Size,
60 61
61 62 uuid_size: u8,
62 63 }
63 64
64 65 pub struct Docket<'on_disk> {
65 66 header: &'on_disk DocketHeader,
66 67 pub uuid: &'on_disk [u8],
67 68 }
68 69
69 70 /// Fields are documented in the *Tree metadata in the docket file*
70 71 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
71 72 #[derive(BytesCast)]
72 73 #[repr(C)]
73 74 pub struct TreeMetadata {
74 75 root_nodes: ChildNodes,
75 76 nodes_with_entry_count: Size,
76 77 nodes_with_copy_source_count: Size,
77 78 unreachable_bytes: Size,
78 79 unused: [u8; 4],
79 80
80 81 /// See *Optional hash of ignore patterns* section of
81 82 /// `mercurial/helptext/internals/dirstate-v2.txt`
82 83 ignore_patterns_hash: IgnorePatternsHash,
83 84 }
84 85
85 86 /// Fields are documented in the *The data file format*
86 87 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
87 88 #[derive(BytesCast)]
88 89 #[repr(C)]
89 90 pub(super) struct Node {
90 91 full_path: PathSlice,
91 92
92 93 /// In bytes from `self.full_path.start`
93 94 base_name_start: PathSize,
94 95
95 96 copy_source: OptPathSlice,
96 97 children: ChildNodes,
97 98 pub(super) descendants_with_entry_count: Size,
98 99 pub(super) tracked_descendants_count: Size,
99 100 flags: U16Be,
100 101 size: U32Be,
101 102 mtime: PackedTruncatedTimestamp,
102 103 }
103 104
104 105 bitflags! {
105 106 #[repr(C)]
106 107 struct Flags: u16 {
107 108 const WDIR_TRACKED = 1 << 0;
108 109 const P1_TRACKED = 1 << 1;
109 110 const P2_INFO = 1 << 2;
110 111 const MODE_EXEC_PERM = 1 << 3;
111 112 const MODE_IS_SYMLINK = 1 << 4;
112 113 const HAS_FALLBACK_EXEC = 1 << 5;
113 114 const FALLBACK_EXEC = 1 << 6;
114 115 const HAS_FALLBACK_SYMLINK = 1 << 7;
115 116 const FALLBACK_SYMLINK = 1 << 8;
116 117 const EXPECTED_STATE_IS_MODIFIED = 1 << 9;
117 118 const HAS_MODE_AND_SIZE = 1 <<10;
118 119 const HAS_MTIME = 1 <<11;
119 120 const MTIME_SECOND_AMBIGUOUS = 1 << 12;
120 121 const DIRECTORY = 1 <<13;
121 122 const ALL_UNKNOWN_RECORDED = 1 <<14;
122 123 const ALL_IGNORED_RECORDED = 1 <<15;
123 124 }
124 125 }
125 126
126 127 /// Duration since the Unix epoch
127 128 #[derive(BytesCast, Copy, Clone)]
128 129 #[repr(C)]
129 130 struct PackedTruncatedTimestamp {
130 131 truncated_seconds: U32Be,
131 132 nanoseconds: U32Be,
132 133 }
133 134
134 135 /// Counted in bytes from the start of the file
135 136 ///
136 137 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
137 138 type Offset = U32Be;
138 139
139 140 /// Counted in number of items
140 141 ///
141 142 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
142 143 type Size = U32Be;
143 144
144 145 /// Counted in bytes
145 146 ///
146 147 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
147 148 type PathSize = U16Be;
148 149
149 150 /// A contiguous sequence of `len` times `Node`, representing the child nodes
150 151 /// of either some other node or of the repository root.
151 152 ///
152 153 /// Always sorted by ascending `full_path`, to allow binary search.
153 154 /// Since nodes with the same parent nodes also have the same parent path,
154 155 /// only the `base_name`s need to be compared during binary search.
155 156 #[derive(BytesCast, Copy, Clone)]
156 157 #[repr(C)]
157 158 struct ChildNodes {
158 159 start: Offset,
159 160 len: Size,
160 161 }
161 162
162 163 /// A `HgPath` of `len` bytes
163 164 #[derive(BytesCast, Copy, Clone)]
164 165 #[repr(C)]
165 166 struct PathSlice {
166 167 start: Offset,
167 168 len: PathSize,
168 169 }
169 170
170 171 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
171 172 type OptPathSlice = PathSlice;
172 173
173 174 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
174 175 ///
175 176 /// This should only happen if Mercurial is buggy or a repository is corrupted.
176 177 #[derive(Debug)]
177 178 pub struct DirstateV2ParseError;
178 179
179 180 impl From<DirstateV2ParseError> for HgError {
180 181 fn from(_: DirstateV2ParseError) -> Self {
181 182 HgError::corrupted("dirstate-v2 parse error")
182 183 }
183 184 }
184 185
185 186 impl From<DirstateV2ParseError> for crate::DirstateError {
186 187 fn from(error: DirstateV2ParseError) -> Self {
187 188 HgError::from(error).into()
188 189 }
189 190 }
190 191
191 192 impl TreeMetadata {
192 193 pub fn as_bytes(&self) -> &[u8] {
193 194 BytesCast::as_bytes(self)
194 195 }
195 196 }
196 197
197 198 impl<'on_disk> Docket<'on_disk> {
198 199 /// Generate the identifier for a new data file
199 200 ///
200 201 /// TODO: support the `HGTEST_UUIDFILE` environment variable.
201 202 /// See `mercurial/revlogutils/docket.py`
202 203 pub fn new_uid() -> String {
203 204 const ID_LENGTH: usize = 8;
204 205 let mut id = String::with_capacity(ID_LENGTH);
205 206 let mut rng = rand::thread_rng();
206 207 for _ in 0..ID_LENGTH {
207 208 // One random hexadecimal digit.
208 209 // `unwrap` never panics because `impl Write for String`
209 210 // never returns an error.
210 211 write!(&mut id, "{:x}", rng.gen_range(0..16)).unwrap();
211 212 }
212 213 id
213 214 }
214 215
215 216 pub fn serialize(
216 217 parents: DirstateParents,
217 218 tree_metadata: TreeMetadata,
218 219 data_size: u64,
219 220 uuid: &[u8],
220 221 ) -> Result<Vec<u8>, std::num::TryFromIntError> {
221 222 let header = DocketHeader {
222 223 marker: *V2_FORMAT_MARKER,
223 224 parent_1: parents.p1.pad_to_256_bits(),
224 225 parent_2: parents.p2.pad_to_256_bits(),
225 226 metadata: tree_metadata,
226 227 data_size: u32::try_from(data_size)?.into(),
227 228 uuid_size: uuid.len().try_into()?,
228 229 };
229 230 let header = header.as_bytes();
230 231 let mut docket = Vec::with_capacity(header.len() + uuid.len());
231 232 docket.extend_from_slice(header);
232 233 docket.extend_from_slice(uuid);
233 234 Ok(docket)
234 235 }
235 236
236 237 pub fn parents(&self) -> DirstateParents {
237 238 use crate::Node;
238 239 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
239 240 .unwrap()
240 241 .clone();
241 242 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
242 243 .unwrap()
243 244 .clone();
244 245 DirstateParents { p1, p2 }
245 246 }
246 247
247 248 pub fn tree_metadata(&self) -> &[u8] {
248 249 self.header.metadata.as_bytes()
249 250 }
250 251
251 252 pub fn data_size(&self) -> usize {
252 253 // This `unwrap` could only panic on a 16-bit CPU
253 254 self.header.data_size.get().try_into().unwrap()
254 255 }
255 256
256 257 pub fn data_filename(&self) -> String {
257 258 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
258 259 }
259 260 }
260 261
261 262 pub fn read_docket(
262 263 on_disk: &[u8],
263 264 ) -> Result<Docket<'_>, DirstateV2ParseError> {
264 265 let (header, uuid) =
265 266 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
266 267 let uuid_size = header.uuid_size as usize;
267 268 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
268 269 Ok(Docket { header, uuid })
269 270 } else {
270 271 Err(DirstateV2ParseError)
271 272 }
272 273 }
273 274
274 275 pub(super) fn read<'on_disk>(
275 276 on_disk: &'on_disk [u8],
276 277 metadata: &[u8],
277 278 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
278 279 if on_disk.is_empty() {
279 return Ok(DirstateMap::empty(on_disk));
280 let mut map = DirstateMap::empty(on_disk);
281 map.dirstate_version = DirstateVersion::V2;
282 return Ok(map);
280 283 }
281 284 let (meta, _) = TreeMetadata::from_bytes(metadata)
282 285 .map_err(|_| DirstateV2ParseError)?;
283 286 let dirstate_map = DirstateMap {
284 287 on_disk,
285 288 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
286 289 on_disk,
287 290 meta.root_nodes,
288 291 )?),
289 292 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
290 293 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
291 294 ignore_patterns_hash: meta.ignore_patterns_hash,
292 295 unreachable_bytes: meta.unreachable_bytes.get(),
293 296 old_data_size: on_disk.len(),
297 dirstate_version: DirstateVersion::V2,
294 298 };
295 299 Ok(dirstate_map)
296 300 }
297 301
298 302 impl Node {
299 303 pub(super) fn full_path<'on_disk>(
300 304 &self,
301 305 on_disk: &'on_disk [u8],
302 306 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
303 307 read_hg_path(on_disk, self.full_path)
304 308 }
305 309
306 310 pub(super) fn base_name_start<'on_disk>(
307 311 &self,
308 312 ) -> Result<usize, DirstateV2ParseError> {
309 313 let start = self.base_name_start.get();
310 314 if start < self.full_path.len.get() {
311 315 let start = usize::try_from(start)
312 316 // u32 -> usize, could only panic on a 16-bit CPU
313 317 .expect("dirstate-v2 base_name_start out of bounds");
314 318 Ok(start)
315 319 } else {
316 320 Err(DirstateV2ParseError)
317 321 }
318 322 }
319 323
320 324 pub(super) fn base_name<'on_disk>(
321 325 &self,
322 326 on_disk: &'on_disk [u8],
323 327 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
324 328 let full_path = self.full_path(on_disk)?;
325 329 let base_name_start = self.base_name_start()?;
326 330 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
327 331 }
328 332
329 333 pub(super) fn path<'on_disk>(
330 334 &self,
331 335 on_disk: &'on_disk [u8],
332 336 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
333 337 Ok(WithBasename::from_raw_parts(
334 338 Cow::Borrowed(self.full_path(on_disk)?),
335 339 self.base_name_start()?,
336 340 ))
337 341 }
338 342
339 343 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
340 344 self.copy_source.start.get() != 0
341 345 }
342 346
343 347 pub(super) fn copy_source<'on_disk>(
344 348 &self,
345 349 on_disk: &'on_disk [u8],
346 350 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
347 351 Ok(if self.has_copy_source() {
348 352 Some(read_hg_path(on_disk, self.copy_source)?)
349 353 } else {
350 354 None
351 355 })
352 356 }
353 357
354 358 fn flags(&self) -> Flags {
355 359 Flags::from_bits_truncate(self.flags.get())
356 360 }
357 361
358 362 fn has_entry(&self) -> bool {
359 363 self.flags().intersects(
360 364 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
361 365 )
362 366 }
363 367
364 368 pub(super) fn node_data(
365 369 &self,
366 370 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
367 371 if self.has_entry() {
368 372 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
369 373 } else if let Some(mtime) = self.cached_directory_mtime()? {
370 374 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
371 375 } else {
372 376 Ok(dirstate_map::NodeData::None)
373 377 }
374 378 }
375 379
376 380 pub(super) fn cached_directory_mtime(
377 381 &self,
378 382 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
379 383 // For now we do not have code to handle the absence of
380 384 // ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is
381 385 // unset.
382 386 if self.flags().contains(Flags::DIRECTORY)
383 387 && self.flags().contains(Flags::HAS_MTIME)
384 388 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
385 389 {
386 390 Ok(Some(self.mtime()?))
387 391 } else {
388 392 Ok(None)
389 393 }
390 394 }
391 395
392 396 fn synthesize_unix_mode(&self) -> u32 {
393 397 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
394 398 libc::S_IFLNK
395 399 } else {
396 400 libc::S_IFREG
397 401 };
398 402 let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
399 403 0o755
400 404 } else {
401 405 0o644
402 406 };
403 407 (file_type | permisions).into()
404 408 }
405 409
406 410 fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> {
407 411 let mut m: TruncatedTimestamp = self.mtime.try_into()?;
408 412 if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) {
409 413 m.second_ambiguous = true;
410 414 }
411 415 Ok(m)
412 416 }
413 417
414 418 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
415 419 // TODO: convert through raw bits instead?
416 420 let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED);
417 421 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
418 422 let p2_info = self.flags().contains(Flags::P2_INFO);
419 423 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
420 424 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
421 425 {
422 426 Some((self.synthesize_unix_mode(), self.size.into()))
423 427 } else {
424 428 None
425 429 };
426 430 let mtime = if self.flags().contains(Flags::HAS_MTIME)
427 431 && !self.flags().contains(Flags::DIRECTORY)
428 432 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
429 433 {
430 434 Some(self.mtime()?)
431 435 } else {
432 436 None
433 437 };
434 438 let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC)
435 439 {
436 440 Some(self.flags().contains(Flags::FALLBACK_EXEC))
437 441 } else {
438 442 None
439 443 };
440 444 let fallback_symlink =
441 445 if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) {
442 446 Some(self.flags().contains(Flags::FALLBACK_SYMLINK))
443 447 } else {
444 448 None
445 449 };
446 450 Ok(DirstateEntry::from_v2_data(
447 451 wdir_tracked,
448 452 p1_tracked,
449 453 p2_info,
450 454 mode_size,
451 455 mtime,
452 456 fallback_exec,
453 457 fallback_symlink,
454 458 ))
455 459 }
456 460
457 461 pub(super) fn entry(
458 462 &self,
459 463 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
460 464 if self.has_entry() {
461 465 Ok(Some(self.assume_entry()?))
462 466 } else {
463 467 Ok(None)
464 468 }
465 469 }
466 470
467 471 pub(super) fn children<'on_disk>(
468 472 &self,
469 473 on_disk: &'on_disk [u8],
470 474 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
471 475 read_nodes(on_disk, self.children)
472 476 }
473 477
474 478 pub(super) fn to_in_memory_node<'on_disk>(
475 479 &self,
476 480 on_disk: &'on_disk [u8],
477 481 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
478 482 Ok(dirstate_map::Node {
479 483 children: dirstate_map::ChildNodes::OnDisk(
480 484 self.children(on_disk)?,
481 485 ),
482 486 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
483 487 data: self.node_data()?,
484 488 descendants_with_entry_count: self
485 489 .descendants_with_entry_count
486 490 .get(),
487 491 tracked_descendants_count: self.tracked_descendants_count.get(),
488 492 })
489 493 }
490 494
491 495 fn from_dirstate_entry(
492 496 entry: &DirstateEntry,
493 497 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
494 498 let (
495 499 wdir_tracked,
496 500 p1_tracked,
497 501 p2_info,
498 502 mode_size_opt,
499 503 mtime_opt,
500 504 fallback_exec,
501 505 fallback_symlink,
502 506 ) = entry.v2_data();
503 507 // TODO: convert throug raw flag bits instead?
504 508 let mut flags = Flags::empty();
505 509 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
506 510 flags.set(Flags::P1_TRACKED, p1_tracked);
507 511 flags.set(Flags::P2_INFO, p2_info);
508 512 let size = if let Some((m, s)) = mode_size_opt {
509 513 let exec_perm = m & (libc::S_IXUSR as u32) != 0;
510 514 let is_symlink = m & (libc::S_IFMT as u32) == libc::S_IFLNK as u32;
511 515 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
512 516 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
513 517 flags.insert(Flags::HAS_MODE_AND_SIZE);
514 518 s.into()
515 519 } else {
516 520 0.into()
517 521 };
518 522 let mtime = if let Some(m) = mtime_opt {
519 523 flags.insert(Flags::HAS_MTIME);
520 524 if m.second_ambiguous {
521 525 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS);
522 526 };
523 527 m.into()
524 528 } else {
525 529 PackedTruncatedTimestamp::null()
526 530 };
527 531 if let Some(f_exec) = fallback_exec {
528 532 flags.insert(Flags::HAS_FALLBACK_EXEC);
529 533 if f_exec {
530 534 flags.insert(Flags::FALLBACK_EXEC);
531 535 }
532 536 }
533 537 if let Some(f_symlink) = fallback_symlink {
534 538 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
535 539 if f_symlink {
536 540 flags.insert(Flags::FALLBACK_SYMLINK);
537 541 }
538 542 }
539 543 (flags, size, mtime)
540 544 }
541 545 }
542 546
543 547 fn read_hg_path(
544 548 on_disk: &[u8],
545 549 slice: PathSlice,
546 550 ) -> Result<&HgPath, DirstateV2ParseError> {
547 551 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
548 552 }
549 553
550 554 fn read_nodes(
551 555 on_disk: &[u8],
552 556 slice: ChildNodes,
553 557 ) -> Result<&[Node], DirstateV2ParseError> {
554 558 read_slice(on_disk, slice.start, slice.len.get())
555 559 }
556 560
557 561 fn read_slice<T, Len>(
558 562 on_disk: &[u8],
559 563 start: Offset,
560 564 len: Len,
561 565 ) -> Result<&[T], DirstateV2ParseError>
562 566 where
563 567 T: BytesCast,
564 568 Len: TryInto<usize>,
565 569 {
566 570 // Either `usize::MAX` would result in "out of bounds" error since a single
567 571 // `&[u8]` cannot occupy the entire addess space.
568 572 let start = start.get().try_into().unwrap_or(std::usize::MAX);
569 573 let len = len.try_into().unwrap_or(std::usize::MAX);
570 574 on_disk
571 575 .get(start..)
572 576 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
573 577 .map(|(slice, _rest)| slice)
574 578 .ok_or_else(|| DirstateV2ParseError)
575 579 }
576 580
577 581 pub(crate) fn for_each_tracked_path<'on_disk>(
578 582 on_disk: &'on_disk [u8],
579 583 metadata: &[u8],
580 584 mut f: impl FnMut(&'on_disk HgPath),
581 585 ) -> Result<(), DirstateV2ParseError> {
582 586 let (meta, _) = TreeMetadata::from_bytes(metadata)
583 587 .map_err(|_| DirstateV2ParseError)?;
584 588 fn recur<'on_disk>(
585 589 on_disk: &'on_disk [u8],
586 590 nodes: ChildNodes,
587 591 f: &mut impl FnMut(&'on_disk HgPath),
588 592 ) -> Result<(), DirstateV2ParseError> {
589 593 for node in read_nodes(on_disk, nodes)? {
590 594 if let Some(entry) = node.entry()? {
591 595 if entry.state().is_tracked() {
592 596 f(node.full_path(on_disk)?)
593 597 }
594 598 }
595 599 recur(on_disk, node.children, f)?
596 600 }
597 601 Ok(())
598 602 }
599 603 recur(on_disk, meta.root_nodes, &mut f)
600 604 }
601 605
602 606 /// Returns new data and metadata, together with whether that data should be
603 607 /// appended to the existing data file whose content is at
604 608 /// `dirstate_map.on_disk` (true), instead of written to a new data file
605 609 /// (false), and the previous size of data on disk.
606 610 pub(super) fn write(
607 611 dirstate_map: &DirstateMap,
608 612 can_append: bool,
609 613 ) -> Result<(Vec<u8>, TreeMetadata, bool, usize), DirstateError> {
610 614 let append = can_append && dirstate_map.write_should_append();
611 615
612 616 // This ignores the space for paths, and for nodes without an entry.
613 617 // TODO: better estimate? Skip the `Vec` and write to a file directly?
614 618 let size_guess = std::mem::size_of::<Node>()
615 619 * dirstate_map.nodes_with_entry_count as usize;
616 620
617 621 let mut writer = Writer {
618 622 dirstate_map,
619 623 append,
620 624 out: Vec::with_capacity(size_guess),
621 625 };
622 626
623 627 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
624 628
625 629 let unreachable_bytes = if append {
626 630 dirstate_map.unreachable_bytes
627 631 } else {
628 632 0
629 633 };
630 634 let meta = TreeMetadata {
631 635 root_nodes,
632 636 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
633 637 nodes_with_copy_source_count: dirstate_map
634 638 .nodes_with_copy_source_count
635 639 .into(),
636 640 unreachable_bytes: unreachable_bytes.into(),
637 641 unused: [0; 4],
638 642 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
639 643 };
640 644 Ok((writer.out, meta, append, dirstate_map.old_data_size))
641 645 }
642 646
643 647 struct Writer<'dmap, 'on_disk> {
644 648 dirstate_map: &'dmap DirstateMap<'on_disk>,
645 649 append: bool,
646 650 out: Vec<u8>,
647 651 }
648 652
649 653 impl Writer<'_, '_> {
650 654 fn write_nodes(
651 655 &mut self,
652 656 nodes: dirstate_map::ChildNodesRef,
653 657 ) -> Result<ChildNodes, DirstateError> {
654 658 // Reuse already-written nodes if possible
655 659 if self.append {
656 660 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
657 661 let start = self.on_disk_offset_of(nodes_slice).expect(
658 662 "dirstate-v2 OnDisk nodes not found within on_disk",
659 663 );
660 664 let len = child_nodes_len_from_usize(nodes_slice.len());
661 665 return Ok(ChildNodes { start, len });
662 666 }
663 667 }
664 668
665 669 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
666 670 // undefined iteration order. Sort to enable binary search in the
667 671 // written file.
668 672 let nodes = nodes.sorted();
669 673 let nodes_len = nodes.len();
670 674
671 675 // First accumulate serialized nodes in a `Vec`
672 676 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
673 677 for node in nodes {
674 678 let children =
675 679 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
676 680 let full_path = node.full_path(self.dirstate_map.on_disk)?;
677 681 let full_path = self.write_path(full_path.as_bytes());
678 682 let copy_source = if let Some(source) =
679 683 node.copy_source(self.dirstate_map.on_disk)?
680 684 {
681 685 self.write_path(source.as_bytes())
682 686 } else {
683 687 PathSlice {
684 688 start: 0.into(),
685 689 len: 0.into(),
686 690 }
687 691 };
688 692 on_disk_nodes.push(match node {
689 693 NodeRef::InMemory(path, node) => {
690 694 let (flags, size, mtime) = match &node.data {
691 695 dirstate_map::NodeData::Entry(entry) => {
692 696 Node::from_dirstate_entry(entry)
693 697 }
694 698 dirstate_map::NodeData::CachedDirectory { mtime } => {
695 699 // we currently never set a mtime if unknown file
696 700 // are present.
697 701 // So if we have a mtime for a directory, we know
698 702 // they are no unknown
699 703 // files and we
700 704 // blindly set ALL_UNKNOWN_RECORDED.
701 705 //
702 706 // We never set ALL_IGNORED_RECORDED since we
703 707 // don't track that case
704 708 // currently.
705 709 let mut flags = Flags::DIRECTORY
706 710 | Flags::HAS_MTIME
707 711 | Flags::ALL_UNKNOWN_RECORDED;
708 712 if mtime.second_ambiguous {
709 713 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS)
710 714 }
711 715 (flags, 0.into(), (*mtime).into())
712 716 }
713 717 dirstate_map::NodeData::None => (
714 718 Flags::DIRECTORY,
715 719 0.into(),
716 720 PackedTruncatedTimestamp::null(),
717 721 ),
718 722 };
719 723 Node {
720 724 children,
721 725 copy_source,
722 726 full_path,
723 727 base_name_start: u16::try_from(path.base_name_start())
724 728 // Could only panic for paths over 64 KiB
725 729 .expect("dirstate-v2 path length overflow")
726 730 .into(),
727 731 descendants_with_entry_count: node
728 732 .descendants_with_entry_count
729 733 .into(),
730 734 tracked_descendants_count: node
731 735 .tracked_descendants_count
732 736 .into(),
733 737 flags: flags.bits().into(),
734 738 size,
735 739 mtime,
736 740 }
737 741 }
738 742 NodeRef::OnDisk(node) => Node {
739 743 children,
740 744 copy_source,
741 745 full_path,
742 746 ..*node
743 747 },
744 748 })
745 749 }
746 750 // … so we can write them contiguously, after writing everything else
747 751 // they refer to.
748 752 let start = self.current_offset();
749 753 let len = child_nodes_len_from_usize(nodes_len);
750 754 self.out.extend(on_disk_nodes.as_bytes());
751 755 Ok(ChildNodes { start, len })
752 756 }
753 757
754 758 /// If the given slice of items is within `on_disk`, returns its offset
755 759 /// from the start of `on_disk`.
756 760 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
757 761 where
758 762 T: BytesCast,
759 763 {
760 764 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
761 765 let start = slice.as_ptr() as usize;
762 766 let end = start + slice.len();
763 767 start..=end
764 768 }
765 769 let slice_addresses = address_range(slice.as_bytes());
766 770 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
767 771 if on_disk_addresses.contains(slice_addresses.start())
768 772 && on_disk_addresses.contains(slice_addresses.end())
769 773 {
770 774 let offset = slice_addresses.start() - on_disk_addresses.start();
771 775 Some(offset_from_usize(offset))
772 776 } else {
773 777 None
774 778 }
775 779 }
776 780
777 781 fn current_offset(&mut self) -> Offset {
778 782 let mut offset = self.out.len();
779 783 if self.append {
780 784 offset += self.dirstate_map.on_disk.len()
781 785 }
782 786 offset_from_usize(offset)
783 787 }
784 788
785 789 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
786 790 let len = path_len_from_usize(slice.len());
787 791 // Reuse an already-written path if possible
788 792 if self.append {
789 793 if let Some(start) = self.on_disk_offset_of(slice) {
790 794 return PathSlice { start, len };
791 795 }
792 796 }
793 797 let start = self.current_offset();
794 798 self.out.extend(slice.as_bytes());
795 799 PathSlice { start, len }
796 800 }
797 801 }
798 802
799 803 fn offset_from_usize(x: usize) -> Offset {
800 804 u32::try_from(x)
801 805 // Could only panic for a dirstate file larger than 4 GiB
802 806 .expect("dirstate-v2 offset overflow")
803 807 .into()
804 808 }
805 809
806 810 fn child_nodes_len_from_usize(x: usize) -> Size {
807 811 u32::try_from(x)
808 812 // Could only panic with over 4 billion nodes
809 813 .expect("dirstate-v2 slice length overflow")
810 814 .into()
811 815 }
812 816
813 817 fn path_len_from_usize(x: usize) -> PathSize {
814 818 u16::try_from(x)
815 819 // Could only panic for paths over 64 KiB
816 820 .expect("dirstate-v2 path length overflow")
817 821 .into()
818 822 }
819 823
820 824 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
821 825 fn from(timestamp: TruncatedTimestamp) -> Self {
822 826 Self {
823 827 truncated_seconds: timestamp.truncated_seconds().into(),
824 828 nanoseconds: timestamp.nanoseconds().into(),
825 829 }
826 830 }
827 831 }
828 832
829 833 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
830 834 type Error = DirstateV2ParseError;
831 835
832 836 fn try_from(
833 837 timestamp: PackedTruncatedTimestamp,
834 838 ) -> Result<Self, Self::Error> {
835 839 Self::from_already_truncated(
836 840 timestamp.truncated_seconds.get(),
837 841 timestamp.nanoseconds.get(),
838 842 false,
839 843 )
840 844 }
841 845 }
842 846 impl PackedTruncatedTimestamp {
843 847 fn null() -> Self {
844 848 Self {
845 849 truncated_seconds: 0.into(),
846 850 nanoseconds: 0.into(),
847 851 }
848 852 }
849 853 }
@@ -1,849 +1,864 b''
1 1 use crate::dirstate::entry::TruncatedTimestamp;
2 2 use crate::dirstate::status::IgnoreFnType;
3 3 use crate::dirstate::status::StatusPath;
4 4 use crate::dirstate_tree::dirstate_map::BorrowedPath;
5 5 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
6 6 use crate::dirstate_tree::dirstate_map::DirstateMap;
7 use crate::dirstate_tree::dirstate_map::DirstateVersion;
7 8 use crate::dirstate_tree::dirstate_map::NodeData;
8 9 use crate::dirstate_tree::dirstate_map::NodeRef;
9 10 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
10 11 use crate::matchers::get_ignore_function;
11 12 use crate::matchers::Matcher;
12 13 use crate::utils::files::get_bytes_from_os_string;
13 14 use crate::utils::files::get_path_from_bytes;
14 15 use crate::utils::hg_path::HgPath;
15 16 use crate::BadMatch;
16 17 use crate::DirstateStatus;
17 18 use crate::EntryState;
18 19 use crate::HgPathBuf;
19 20 use crate::HgPathCow;
20 21 use crate::PatternFileWarning;
21 22 use crate::StatusError;
22 23 use crate::StatusOptions;
23 24 use micro_timer::timed;
24 25 use rayon::prelude::*;
25 26 use sha1::{Digest, Sha1};
26 27 use std::borrow::Cow;
27 28 use std::io;
28 29 use std::path::Path;
29 30 use std::path::PathBuf;
30 31 use std::sync::Mutex;
31 32 use std::time::SystemTime;
32 33
33 34 /// Returns the status of the working directory compared to its parent
34 35 /// changeset.
35 36 ///
36 37 /// This algorithm is based on traversing the filesystem tree (`fs` in function
37 38 /// and variable names) and dirstate tree at the same time. The core of this
38 39 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
39 40 /// and its use of `itertools::merge_join_by`. When reaching a path that only
40 41 /// exists in one of the two trees, depending on information requested by
41 42 /// `options` we may need to traverse the remaining subtree.
42 43 #[timed]
43 44 pub fn status<'dirstate>(
44 45 dmap: &'dirstate mut DirstateMap,
45 46 matcher: &(dyn Matcher + Sync),
46 47 root_dir: PathBuf,
47 48 ignore_files: Vec<PathBuf>,
48 49 options: StatusOptions,
49 50 ) -> Result<(DirstateStatus<'dirstate>, Vec<PatternFileWarning>), StatusError>
50 51 {
51 52 // Force the global rayon threadpool to not exceed 16 concurrent threads.
52 53 // This is a stop-gap measure until we figure out why using more than 16
53 54 // threads makes `status` slower for each additional thread.
54 55 // We use `ok()` in case the global threadpool has already been
55 56 // instantiated in `rhg` or some other caller.
56 57 // TODO find the underlying cause and fix it, then remove this.
57 58 rayon::ThreadPoolBuilder::new()
58 59 .num_threads(16)
59 60 .build_global()
60 61 .ok();
61 62
62 63 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
63 64 if options.list_ignored || options.list_unknown {
65 let (ignore_fn, warnings, changed) = match dmap.dirstate_version {
66 DirstateVersion::V1 => {
67 let (ignore_fn, warnings) = get_ignore_function(
68 ignore_files,
69 &root_dir,
70 &mut |_pattern_bytes| {},
71 )?;
72 (ignore_fn, warnings, None)
73 }
74 DirstateVersion::V2 => {
64 75 let mut hasher = Sha1::new();
65 76 let (ignore_fn, warnings) = get_ignore_function(
66 77 ignore_files,
67 78 &root_dir,
68 79 &mut |pattern_bytes| hasher.update(pattern_bytes),
69 80 )?;
70 81 let new_hash = *hasher.finalize().as_ref();
71 82 let changed = new_hash != dmap.ignore_patterns_hash;
72 83 dmap.ignore_patterns_hash = new_hash;
73 84 (ignore_fn, warnings, Some(changed))
85 }
86 };
87 (ignore_fn, warnings, changed)
74 88 } else {
75 89 (Box::new(|&_| true), vec![], None)
76 90 };
77 91
78 92 let filesystem_time_at_status_start =
79 93 filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
80 94
81 95 // If the repository is under the current directory, prefer using a
82 96 // relative path, so the kernel needs to traverse fewer directory in every
83 97 // call to `read_dir` or `symlink_metadata`.
84 98 // This is effective in the common case where the current directory is the
85 99 // repository root.
86 100
87 101 // TODO: Better yet would be to use libc functions like `openat` and
88 102 // `fstatat` to remove such repeated traversals entirely, but the standard
89 103 // library does not provide APIs based on those.
90 104 // Maybe with a crate like https://crates.io/crates/openat instead?
91 105 let root_dir = if let Some(relative) = std::env::current_dir()
92 106 .ok()
93 107 .and_then(|cwd| root_dir.strip_prefix(cwd).ok())
94 108 {
95 109 relative
96 110 } else {
97 111 &root_dir
98 112 };
99 113
100 114 let outcome = DirstateStatus {
101 115 filesystem_time_at_status_start,
102 116 ..Default::default()
103 117 };
104 118 let common = StatusCommon {
105 119 dmap,
106 120 options,
107 121 matcher,
108 122 ignore_fn,
109 123 outcome: Mutex::new(outcome),
110 124 ignore_patterns_have_changed: patterns_changed,
111 125 new_cachable_directories: Default::default(),
112 126 outated_cached_directories: Default::default(),
113 127 filesystem_time_at_status_start,
114 128 };
115 129 let is_at_repo_root = true;
116 130 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
117 131 let has_ignored_ancestor = false;
118 132 let root_cached_mtime = None;
119 133 let root_dir_metadata = None;
120 134 // If the path we have for the repository root is a symlink, do follow it.
121 135 // (As opposed to symlinks within the working directory which are not
122 136 // followed, using `std::fs::symlink_metadata`.)
123 137 common.traverse_fs_directory_and_dirstate(
124 138 has_ignored_ancestor,
125 139 dmap.root.as_ref(),
126 140 hg_path,
127 141 &root_dir,
128 142 root_dir_metadata,
129 143 root_cached_mtime,
130 144 is_at_repo_root,
131 145 )?;
132 146 let mut outcome = common.outcome.into_inner().unwrap();
133 147 let new_cachable = common.new_cachable_directories.into_inner().unwrap();
134 148 let outdated = common.outated_cached_directories.into_inner().unwrap();
135 149
136 150 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
137 151 || !outdated.is_empty()
138 || !new_cachable.is_empty();
152 || (!new_cachable.is_empty()
153 && dmap.dirstate_version == DirstateVersion::V2);
139 154
140 155 // Remove outdated mtimes before adding new mtimes, in case a given
141 156 // directory is both
142 157 for path in &outdated {
143 158 let node = dmap.get_or_insert(path)?;
144 159 if let NodeData::CachedDirectory { .. } = &node.data {
145 160 node.data = NodeData::None
146 161 }
147 162 }
148 163 for (path, mtime) in &new_cachable {
149 164 let node = dmap.get_or_insert(path)?;
150 165 match &node.data {
151 166 NodeData::Entry(_) => {} // Don’t overwrite an entry
152 167 NodeData::CachedDirectory { .. } | NodeData::None => {
153 168 node.data = NodeData::CachedDirectory { mtime: *mtime }
154 169 }
155 170 }
156 171 }
157 172
158 173 Ok((outcome, warnings))
159 174 }
160 175
161 176 /// Bag of random things needed by various parts of the algorithm. Reduces the
162 177 /// number of parameters passed to functions.
163 178 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
164 179 dmap: &'tree DirstateMap<'on_disk>,
165 180 options: StatusOptions,
166 181 matcher: &'a (dyn Matcher + Sync),
167 182 ignore_fn: IgnoreFnType<'a>,
168 183 outcome: Mutex<DirstateStatus<'on_disk>>,
169 184 new_cachable_directories:
170 185 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
171 186 outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
172 187
173 188 /// Whether ignore files like `.hgignore` have changed since the previous
174 189 /// time a `status()` call wrote their hash to the dirstate. `None` means
175 190 /// we don’t know as this run doesn’t list either ignored or uknown files
176 191 /// and therefore isn’t reading `.hgignore`.
177 192 ignore_patterns_have_changed: Option<bool>,
178 193
179 194 /// The current time at the start of the `status()` algorithm, as measured
180 195 /// and possibly truncated by the filesystem.
181 196 filesystem_time_at_status_start: Option<TruncatedTimestamp>,
182 197 }
183 198
184 199 enum Outcome {
185 200 Modified,
186 201 Added,
187 202 Removed,
188 203 Deleted,
189 204 Clean,
190 205 Ignored,
191 206 Unknown,
192 207 Unsure,
193 208 }
194 209
195 210 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
196 211 fn push_outcome(
197 212 &self,
198 213 which: Outcome,
199 214 dirstate_node: &NodeRef<'tree, 'on_disk>,
200 215 ) -> Result<(), DirstateV2ParseError> {
201 216 let path = dirstate_node
202 217 .full_path_borrowed(self.dmap.on_disk)?
203 218 .detach_from_tree();
204 219 let copy_source = if self.options.list_copies {
205 220 dirstate_node
206 221 .copy_source_borrowed(self.dmap.on_disk)?
207 222 .map(|source| source.detach_from_tree())
208 223 } else {
209 224 None
210 225 };
211 226 self.push_outcome_common(which, path, copy_source);
212 227 Ok(())
213 228 }
214 229
215 230 fn push_outcome_without_copy_source(
216 231 &self,
217 232 which: Outcome,
218 233 path: &BorrowedPath<'_, 'on_disk>,
219 234 ) {
220 235 self.push_outcome_common(which, path.detach_from_tree(), None)
221 236 }
222 237
223 238 fn push_outcome_common(
224 239 &self,
225 240 which: Outcome,
226 241 path: HgPathCow<'on_disk>,
227 242 copy_source: Option<HgPathCow<'on_disk>>,
228 243 ) {
229 244 let mut outcome = self.outcome.lock().unwrap();
230 245 let vec = match which {
231 246 Outcome::Modified => &mut outcome.modified,
232 247 Outcome::Added => &mut outcome.added,
233 248 Outcome::Removed => &mut outcome.removed,
234 249 Outcome::Deleted => &mut outcome.deleted,
235 250 Outcome::Clean => &mut outcome.clean,
236 251 Outcome::Ignored => &mut outcome.ignored,
237 252 Outcome::Unknown => &mut outcome.unknown,
238 253 Outcome::Unsure => &mut outcome.unsure,
239 254 };
240 255 vec.push(StatusPath { path, copy_source });
241 256 }
242 257
243 258 fn read_dir(
244 259 &self,
245 260 hg_path: &HgPath,
246 261 fs_path: &Path,
247 262 is_at_repo_root: bool,
248 263 ) -> Result<Vec<DirEntry>, ()> {
249 264 DirEntry::read_dir(fs_path, is_at_repo_root)
250 265 .map_err(|error| self.io_error(error, hg_path))
251 266 }
252 267
253 268 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
254 269 let errno = error.raw_os_error().expect("expected real OS error");
255 270 self.outcome
256 271 .lock()
257 272 .unwrap()
258 273 .bad
259 274 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
260 275 }
261 276
262 277 fn check_for_outdated_directory_cache(
263 278 &self,
264 279 dirstate_node: &NodeRef<'tree, 'on_disk>,
265 280 ) -> Result<(), DirstateV2ParseError> {
266 281 if self.ignore_patterns_have_changed == Some(true)
267 282 && dirstate_node.cached_directory_mtime()?.is_some()
268 283 {
269 284 self.outated_cached_directories.lock().unwrap().push(
270 285 dirstate_node
271 286 .full_path_borrowed(self.dmap.on_disk)?
272 287 .detach_from_tree(),
273 288 )
274 289 }
275 290 Ok(())
276 291 }
277 292
278 293 /// If this returns true, we can get accurate results by only using
279 294 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
280 295 /// need to call `read_dir`.
281 296 fn can_skip_fs_readdir(
282 297 &self,
283 298 directory_metadata: Option<&std::fs::Metadata>,
284 299 cached_directory_mtime: Option<TruncatedTimestamp>,
285 300 ) -> bool {
286 301 if !self.options.list_unknown && !self.options.list_ignored {
287 302 // All states that we care about listing have corresponding
288 303 // dirstate entries.
289 304 // This happens for example with `hg status -mard`.
290 305 return true;
291 306 }
292 307 if !self.options.list_ignored
293 308 && self.ignore_patterns_have_changed == Some(false)
294 309 {
295 310 if let Some(cached_mtime) = cached_directory_mtime {
296 311 // The dirstate contains a cached mtime for this directory, set
297 312 // by a previous run of the `status` algorithm which found this
298 313 // directory eligible for `read_dir` caching.
299 314 if let Some(meta) = directory_metadata {
300 315 if cached_mtime
301 316 .likely_equal_to_mtime_of(meta)
302 317 .unwrap_or(false)
303 318 {
304 319 // The mtime of that directory has not changed
305 320 // since then, which means that the results of
306 321 // `read_dir` should also be unchanged.
307 322 return true;
308 323 }
309 324 }
310 325 }
311 326 }
312 327 false
313 328 }
314 329
315 330 /// Returns whether all child entries of the filesystem directory have a
316 331 /// corresponding dirstate node or are ignored.
317 332 fn traverse_fs_directory_and_dirstate(
318 333 &self,
319 334 has_ignored_ancestor: bool,
320 335 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
321 336 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
322 337 directory_fs_path: &Path,
323 338 directory_metadata: Option<&std::fs::Metadata>,
324 339 cached_directory_mtime: Option<TruncatedTimestamp>,
325 340 is_at_repo_root: bool,
326 341 ) -> Result<bool, DirstateV2ParseError> {
327 342 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
328 343 {
329 344 dirstate_nodes
330 345 .par_iter()
331 346 .map(|dirstate_node| {
332 347 let fs_path = directory_fs_path.join(get_path_from_bytes(
333 348 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
334 349 ));
335 350 match std::fs::symlink_metadata(&fs_path) {
336 351 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
337 352 &fs_path,
338 353 &fs_metadata,
339 354 dirstate_node,
340 355 has_ignored_ancestor,
341 356 ),
342 357 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
343 358 self.traverse_dirstate_only(dirstate_node)
344 359 }
345 360 Err(error) => {
346 361 let hg_path =
347 362 dirstate_node.full_path(self.dmap.on_disk)?;
348 363 Ok(self.io_error(error, hg_path))
349 364 }
350 365 }
351 366 })
352 367 .collect::<Result<_, _>>()?;
353 368
354 369 // We don’t know, so conservatively say this isn’t the case
355 370 let children_all_have_dirstate_node_or_are_ignored = false;
356 371
357 372 return Ok(children_all_have_dirstate_node_or_are_ignored);
358 373 }
359 374
360 375 let mut fs_entries = if let Ok(entries) = self.read_dir(
361 376 directory_hg_path,
362 377 directory_fs_path,
363 378 is_at_repo_root,
364 379 ) {
365 380 entries
366 381 } else {
367 382 // Treat an unreadable directory (typically because of insufficient
368 383 // permissions) like an empty directory. `self.read_dir` has
369 384 // already called `self.io_error` so a warning will be emitted.
370 385 Vec::new()
371 386 };
372 387
373 388 // `merge_join_by` requires both its input iterators to be sorted:
374 389
375 390 let dirstate_nodes = dirstate_nodes.sorted();
376 391 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
377 392 // https://github.com/rust-lang/rust/issues/34162
378 393 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
379 394
380 395 // Propagate here any error that would happen inside the comparison
381 396 // callback below
382 397 for dirstate_node in &dirstate_nodes {
383 398 dirstate_node.base_name(self.dmap.on_disk)?;
384 399 }
385 400 itertools::merge_join_by(
386 401 dirstate_nodes,
387 402 &fs_entries,
388 403 |dirstate_node, fs_entry| {
389 404 // This `unwrap` never panics because we already propagated
390 405 // those errors above
391 406 dirstate_node
392 407 .base_name(self.dmap.on_disk)
393 408 .unwrap()
394 409 .cmp(&fs_entry.base_name)
395 410 },
396 411 )
397 412 .par_bridge()
398 413 .map(|pair| {
399 414 use itertools::EitherOrBoth::*;
400 415 let has_dirstate_node_or_is_ignored;
401 416 match pair {
402 417 Both(dirstate_node, fs_entry) => {
403 418 self.traverse_fs_and_dirstate(
404 419 &fs_entry.full_path,
405 420 &fs_entry.metadata,
406 421 dirstate_node,
407 422 has_ignored_ancestor,
408 423 )?;
409 424 has_dirstate_node_or_is_ignored = true
410 425 }
411 426 Left(dirstate_node) => {
412 427 self.traverse_dirstate_only(dirstate_node)?;
413 428 has_dirstate_node_or_is_ignored = true;
414 429 }
415 430 Right(fs_entry) => {
416 431 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
417 432 has_ignored_ancestor,
418 433 directory_hg_path,
419 434 fs_entry,
420 435 )
421 436 }
422 437 }
423 438 Ok(has_dirstate_node_or_is_ignored)
424 439 })
425 440 .try_reduce(|| true, |a, b| Ok(a && b))
426 441 }
427 442
428 443 fn traverse_fs_and_dirstate(
429 444 &self,
430 445 fs_path: &Path,
431 446 fs_metadata: &std::fs::Metadata,
432 447 dirstate_node: NodeRef<'tree, 'on_disk>,
433 448 has_ignored_ancestor: bool,
434 449 ) -> Result<(), DirstateV2ParseError> {
435 450 self.check_for_outdated_directory_cache(&dirstate_node)?;
436 451 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
437 452 let file_type = fs_metadata.file_type();
438 453 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
439 454 if !file_or_symlink {
440 455 // If we previously had a file here, it was removed (with
441 456 // `hg rm` or similar) or deleted before it could be
442 457 // replaced by a directory or something else.
443 458 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
444 459 }
445 460 if file_type.is_dir() {
446 461 if self.options.collect_traversed_dirs {
447 462 self.outcome
448 463 .lock()
449 464 .unwrap()
450 465 .traversed
451 466 .push(hg_path.detach_from_tree())
452 467 }
453 468 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
454 469 let is_at_repo_root = false;
455 470 let children_all_have_dirstate_node_or_are_ignored = self
456 471 .traverse_fs_directory_and_dirstate(
457 472 is_ignored,
458 473 dirstate_node.children(self.dmap.on_disk)?,
459 474 hg_path,
460 475 fs_path,
461 476 Some(fs_metadata),
462 477 dirstate_node.cached_directory_mtime()?,
463 478 is_at_repo_root,
464 479 )?;
465 480 self.maybe_save_directory_mtime(
466 481 children_all_have_dirstate_node_or_are_ignored,
467 482 fs_metadata,
468 483 dirstate_node,
469 484 )?
470 485 } else {
471 486 if file_or_symlink && self.matcher.matches(hg_path) {
472 487 if let Some(state) = dirstate_node.state()? {
473 488 match state {
474 489 EntryState::Added => {
475 490 self.push_outcome(Outcome::Added, &dirstate_node)?
476 491 }
477 492 EntryState::Removed => self
478 493 .push_outcome(Outcome::Removed, &dirstate_node)?,
479 494 EntryState::Merged => self
480 495 .push_outcome(Outcome::Modified, &dirstate_node)?,
481 496 EntryState::Normal => self
482 497 .handle_normal_file(&dirstate_node, fs_metadata)?,
483 498 }
484 499 } else {
485 500 // `node.entry.is_none()` indicates a "directory"
486 501 // node, but the filesystem has a file
487 502 self.mark_unknown_or_ignored(
488 503 has_ignored_ancestor,
489 504 hg_path,
490 505 );
491 506 }
492 507 }
493 508
494 509 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
495 510 {
496 511 self.traverse_dirstate_only(child_node)?
497 512 }
498 513 }
499 514 Ok(())
500 515 }
501 516
502 517 fn maybe_save_directory_mtime(
503 518 &self,
504 519 children_all_have_dirstate_node_or_are_ignored: bool,
505 520 directory_metadata: &std::fs::Metadata,
506 521 dirstate_node: NodeRef<'tree, 'on_disk>,
507 522 ) -> Result<(), DirstateV2ParseError> {
508 523 if !children_all_have_dirstate_node_or_are_ignored {
509 524 return Ok(());
510 525 }
511 526 // All filesystem directory entries from `read_dir` have a
512 527 // corresponding node in the dirstate, so we can reconstitute the
513 528 // names of those entries without calling `read_dir` again.
514 529
515 530 // TODO: use let-else here and below when available:
516 531 // https://github.com/rust-lang/rust/issues/87335
517 532 let status_start = if let Some(status_start) =
518 533 &self.filesystem_time_at_status_start
519 534 {
520 535 status_start
521 536 } else {
522 537 return Ok(());
523 538 };
524 539
525 540 // Although the Rust standard library’s `SystemTime` type
526 541 // has nanosecond precision, the times reported for a
527 542 // directory’s (or file’s) modified time may have lower
528 543 // resolution based on the filesystem (for example ext3
529 544 // only stores integer seconds), kernel (see
530 545 // https://stackoverflow.com/a/14393315/1162888), etc.
531 546 let directory_mtime = if let Ok(option) =
532 547 TruncatedTimestamp::for_reliable_mtime_of(
533 548 directory_metadata,
534 549 status_start,
535 550 ) {
536 551 if let Some(directory_mtime) = option {
537 552 directory_mtime
538 553 } else {
539 554 // The directory was modified too recently,
540 555 // don’t cache its `read_dir` results.
541 556 //
542 557 // 1. A change to this directory (direct child was
543 558 // added or removed) cause its mtime to be set
544 559 // (possibly truncated) to `directory_mtime`
545 560 // 2. This `status` algorithm calls `read_dir`
546 561 // 3. An other change is made to the same directory is
547 562 // made so that calling `read_dir` agin would give
548 563 // different results, but soon enough after 1. that
549 564 // the mtime stays the same
550 565 //
551 566 // On a system where the time resolution poor, this
552 567 // scenario is not unlikely if all three steps are caused
553 568 // by the same script.
554 569 return Ok(());
555 570 }
556 571 } else {
557 572 // OS/libc does not support mtime?
558 573 return Ok(());
559 574 };
560 575 // We’ve observed (through `status_start`) that time has
561 576 // “progressed” since `directory_mtime`, so any further
562 577 // change to this directory is extremely likely to cause a
563 578 // different mtime.
564 579 //
565 580 // Having the same mtime again is not entirely impossible
566 581 // since the system clock is not monotonous. It could jump
567 582 // backward to some point before `directory_mtime`, then a
568 583 // directory change could potentially happen during exactly
569 584 // the wrong tick.
570 585 //
571 586 // We deem this scenario (unlike the previous one) to be
572 587 // unlikely enough in practice.
573 588
574 589 let is_up_to_date =
575 590 if let Some(cached) = dirstate_node.cached_directory_mtime()? {
576 591 cached.likely_equal(directory_mtime)
577 592 } else {
578 593 false
579 594 };
580 595 if !is_up_to_date {
581 596 let hg_path = dirstate_node
582 597 .full_path_borrowed(self.dmap.on_disk)?
583 598 .detach_from_tree();
584 599 self.new_cachable_directories
585 600 .lock()
586 601 .unwrap()
587 602 .push((hg_path, directory_mtime))
588 603 }
589 604 Ok(())
590 605 }
591 606
592 607 /// A file with `EntryState::Normal` in the dirstate was found in the
593 608 /// filesystem
594 609 fn handle_normal_file(
595 610 &self,
596 611 dirstate_node: &NodeRef<'tree, 'on_disk>,
597 612 fs_metadata: &std::fs::Metadata,
598 613 ) -> Result<(), DirstateV2ParseError> {
599 614 // Keep the low 31 bits
600 615 fn truncate_u64(value: u64) -> i32 {
601 616 (value & 0x7FFF_FFFF) as i32
602 617 }
603 618
604 619 let entry = dirstate_node
605 620 .entry()?
606 621 .expect("handle_normal_file called with entry-less node");
607 622 let mode_changed =
608 623 || self.options.check_exec && entry.mode_changed(fs_metadata);
609 624 let size = entry.size();
610 625 let size_changed = size != truncate_u64(fs_metadata.len());
611 626 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
612 627 // issue6456: Size returned may be longer due to encryption
613 628 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
614 629 self.push_outcome(Outcome::Unsure, dirstate_node)?
615 630 } else if dirstate_node.has_copy_source()
616 631 || entry.is_from_other_parent()
617 632 || (size >= 0 && (size_changed || mode_changed()))
618 633 {
619 634 self.push_outcome(Outcome::Modified, dirstate_node)?
620 635 } else {
621 636 let mtime_looks_clean;
622 637 if let Some(dirstate_mtime) = entry.truncated_mtime() {
623 638 let fs_mtime = TruncatedTimestamp::for_mtime_of(fs_metadata)
624 639 .expect("OS/libc does not support mtime?");
625 640 // There might be a change in the future if for example the
626 641 // internal clock become off while process run, but this is a
627 642 // case where the issues the user would face
628 643 // would be a lot worse and there is nothing we
629 644 // can really do.
630 645 mtime_looks_clean = fs_mtime.likely_equal(dirstate_mtime)
631 646 } else {
632 647 // No mtime in the dirstate entry
633 648 mtime_looks_clean = false
634 649 };
635 650 if !mtime_looks_clean {
636 651 self.push_outcome(Outcome::Unsure, dirstate_node)?
637 652 } else if self.options.list_clean {
638 653 self.push_outcome(Outcome::Clean, dirstate_node)?
639 654 }
640 655 }
641 656 Ok(())
642 657 }
643 658
644 659 /// A node in the dirstate tree has no corresponding filesystem entry
645 660 fn traverse_dirstate_only(
646 661 &self,
647 662 dirstate_node: NodeRef<'tree, 'on_disk>,
648 663 ) -> Result<(), DirstateV2ParseError> {
649 664 self.check_for_outdated_directory_cache(&dirstate_node)?;
650 665 self.mark_removed_or_deleted_if_file(&dirstate_node)?;
651 666 dirstate_node
652 667 .children(self.dmap.on_disk)?
653 668 .par_iter()
654 669 .map(|child_node| self.traverse_dirstate_only(child_node))
655 670 .collect()
656 671 }
657 672
658 673 /// A node in the dirstate tree has no corresponding *file* on the
659 674 /// filesystem
660 675 ///
661 676 /// Does nothing on a "directory" node
662 677 fn mark_removed_or_deleted_if_file(
663 678 &self,
664 679 dirstate_node: &NodeRef<'tree, 'on_disk>,
665 680 ) -> Result<(), DirstateV2ParseError> {
666 681 if let Some(state) = dirstate_node.state()? {
667 682 let path = dirstate_node.full_path(self.dmap.on_disk)?;
668 683 if self.matcher.matches(path) {
669 684 if let EntryState::Removed = state {
670 685 self.push_outcome(Outcome::Removed, dirstate_node)?
671 686 } else {
672 687 self.push_outcome(Outcome::Deleted, &dirstate_node)?
673 688 }
674 689 }
675 690 }
676 691 Ok(())
677 692 }
678 693
679 694 /// Something in the filesystem has no corresponding dirstate node
680 695 ///
681 696 /// Returns whether that path is ignored
682 697 fn traverse_fs_only(
683 698 &self,
684 699 has_ignored_ancestor: bool,
685 700 directory_hg_path: &HgPath,
686 701 fs_entry: &DirEntry,
687 702 ) -> bool {
688 703 let hg_path = directory_hg_path.join(&fs_entry.base_name);
689 704 let file_type = fs_entry.metadata.file_type();
690 705 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
691 706 if file_type.is_dir() {
692 707 let is_ignored =
693 708 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
694 709 let traverse_children = if is_ignored {
695 710 // Descendants of an ignored directory are all ignored
696 711 self.options.list_ignored
697 712 } else {
698 713 // Descendants of an unknown directory may be either unknown or
699 714 // ignored
700 715 self.options.list_unknown || self.options.list_ignored
701 716 };
702 717 if traverse_children {
703 718 let is_at_repo_root = false;
704 719 if let Ok(children_fs_entries) = self.read_dir(
705 720 &hg_path,
706 721 &fs_entry.full_path,
707 722 is_at_repo_root,
708 723 ) {
709 724 children_fs_entries.par_iter().for_each(|child_fs_entry| {
710 725 self.traverse_fs_only(
711 726 is_ignored,
712 727 &hg_path,
713 728 child_fs_entry,
714 729 );
715 730 })
716 731 }
717 732 }
718 733 if self.options.collect_traversed_dirs {
719 734 self.outcome.lock().unwrap().traversed.push(hg_path.into())
720 735 }
721 736 is_ignored
722 737 } else {
723 738 if file_or_symlink {
724 739 if self.matcher.matches(&hg_path) {
725 740 self.mark_unknown_or_ignored(
726 741 has_ignored_ancestor,
727 742 &BorrowedPath::InMemory(&hg_path),
728 743 )
729 744 } else {
730 745 // We haven’t computed whether this path is ignored. It
731 746 // might not be, and a future run of status might have a
732 747 // different matcher that matches it. So treat it as not
733 748 // ignored. That is, inhibit readdir caching of the parent
734 749 // directory.
735 750 false
736 751 }
737 752 } else {
738 753 // This is neither a directory, a plain file, or a symlink.
739 754 // Treat it like an ignored file.
740 755 true
741 756 }
742 757 }
743 758 }
744 759
745 760 /// Returns whether that path is ignored
746 761 fn mark_unknown_or_ignored(
747 762 &self,
748 763 has_ignored_ancestor: bool,
749 764 hg_path: &BorrowedPath<'_, 'on_disk>,
750 765 ) -> bool {
751 766 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
752 767 if is_ignored {
753 768 if self.options.list_ignored {
754 769 self.push_outcome_without_copy_source(
755 770 Outcome::Ignored,
756 771 hg_path,
757 772 )
758 773 }
759 774 } else {
760 775 if self.options.list_unknown {
761 776 self.push_outcome_without_copy_source(
762 777 Outcome::Unknown,
763 778 hg_path,
764 779 )
765 780 }
766 781 }
767 782 is_ignored
768 783 }
769 784 }
770 785
771 786 struct DirEntry {
772 787 base_name: HgPathBuf,
773 788 full_path: PathBuf,
774 789 metadata: std::fs::Metadata,
775 790 }
776 791
777 792 impl DirEntry {
778 793 /// Returns **unsorted** entries in the given directory, with name and
779 794 /// metadata.
780 795 ///
781 796 /// If a `.hg` sub-directory is encountered:
782 797 ///
783 798 /// * At the repository root, ignore that sub-directory
784 799 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
785 800 /// list instead.
786 801 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
787 802 // `read_dir` returns a "not found" error for the empty path
788 803 let at_cwd = path == Path::new("");
789 804 let read_dir_path = if at_cwd { Path::new(".") } else { path };
790 805 let mut results = Vec::new();
791 806 for entry in read_dir_path.read_dir()? {
792 807 let entry = entry?;
793 808 let metadata = match entry.metadata() {
794 809 Ok(v) => v,
795 810 Err(e) => {
796 811 // race with file deletion?
797 812 if e.kind() == std::io::ErrorKind::NotFound {
798 813 continue;
799 814 } else {
800 815 return Err(e);
801 816 }
802 817 }
803 818 };
804 819 let file_name = entry.file_name();
805 820 // FIXME don't do this when cached
806 821 if file_name == ".hg" {
807 822 if is_at_repo_root {
808 823 // Skip the repo’s own .hg (might be a symlink)
809 824 continue;
810 825 } else if metadata.is_dir() {
811 826 // A .hg sub-directory at another location means a subrepo,
812 827 // skip it entirely.
813 828 return Ok(Vec::new());
814 829 }
815 830 }
816 831 let full_path = if at_cwd {
817 832 file_name.clone().into()
818 833 } else {
819 834 entry.path()
820 835 };
821 836 let base_name = get_bytes_from_os_string(file_name).into();
822 837 results.push(DirEntry {
823 838 base_name,
824 839 full_path,
825 840 metadata,
826 841 })
827 842 }
828 843 Ok(results)
829 844 }
830 845 }
831 846
832 847 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
833 848 /// of the give repository.
834 849 ///
835 850 /// This is similar to `SystemTime::now()`, with the result truncated to the
836 851 /// same time resolution as other files’ modification times. Using `.hg`
837 852 /// instead of the system’s default temporary directory (such as `/tmp`) makes
838 853 /// it more likely the temporary file is in the same disk partition as contents
839 854 /// of the working directory, which can matter since different filesystems may
840 855 /// store timestamps with different resolutions.
841 856 ///
842 857 /// This may fail, typically if we lack write permissions. In that case we
843 858 /// should continue the `status()` algoritm anyway and consider the current
844 859 /// date/time to be unknown.
845 860 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
846 861 tempfile::tempfile_in(repo_root.join(".hg"))?
847 862 .metadata()?
848 863 .modified()
849 864 }
General Comments 0
You need to be logged in to leave comments. Login now