##// END OF EJS Templates
status: Extend read_dir caching to directories with ignored files...
Simon Sapin -
r48269:94e38822 default
parent child Browse files
Show More
@@ -1,600 +1,603 b''
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! # File format
4 4 //!
5 5 //! The file starts with a fixed-sized header, whose layout is defined by the
6 6 //! `Header` struct. Its `root` field contains the slice (offset and length) to
7 7 //! the nodes representing the files and directories at the root of the
8 8 //! repository. Each node is also fixed-size, defined by the `Node` struct.
9 9 //! Nodes in turn contain slices to variable-size paths, and to their own child
10 10 //! nodes (if any) for nested files and directories.
11 11
12 12 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
13 13 use crate::dirstate_tree::path_with_basename::WithBasename;
14 14 use crate::errors::HgError;
15 15 use crate::utils::hg_path::HgPath;
16 16 use crate::DirstateEntry;
17 17 use crate::DirstateError;
18 18 use crate::DirstateParents;
19 19 use crate::EntryState;
20 20 use bytes_cast::unaligned::{I32Be, I64Be, U32Be, U64Be};
21 21 use bytes_cast::BytesCast;
22 22 use std::borrow::Cow;
23 23 use std::convert::TryFrom;
24 24 use std::time::{Duration, SystemTime, UNIX_EPOCH};
25 25
26 26 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
27 27 /// This a redundant sanity check more than an actual "magic number" since
28 28 /// `.hg/requires` already governs which format should be used.
29 29 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
30 30
31 31 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
32 32 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
33 33
34 34 #[derive(BytesCast)]
35 35 #[repr(C)]
36 36 struct Header {
37 37 marker: [u8; V2_FORMAT_MARKER.len()],
38 38
39 39 /// `dirstatemap.parents()` in `mercurial/dirstate.py` relies on this
40 40 /// `parents` field being at this offset, immediately after `marker`.
41 41 parents: DirstateParents,
42 42
43 43 root: ChildNodes,
44 44 nodes_with_entry_count: Size,
45 45 nodes_with_copy_source_count: Size,
46 46
47 47 /// If non-zero, a hash of ignore files that were used for some previous
48 48 /// run of the `status` algorithm.
49 49 ///
50 50 /// We define:
51 51 ///
52 52 /// * "Root" ignore files are `.hgignore` at the root of the repository if
53 53 /// it exists, and files from `ui.ignore.*` config. This set of files is
54 54 /// then sorted by the string representation of their path.
55 55 /// * The "expanded contents" of an ignore files is the byte string made
56 56 /// by concatenating its contents with the "expanded contents" of other
57 57 /// files included with `include:` or `subinclude:` files, in inclusion
58 58 /// order. This definition is recursive, as included files can
59 59 /// themselves include more files.
60 60 ///
61 61 /// This hash is defined as the SHA-1 of the concatenation (in sorted
62 62 /// order) of the "expanded contents" of each "root" ignore file.
63 63 /// (Note that computing this does not require actually concatenating byte
64 64 /// strings into contiguous memory, instead SHA-1 hashing can be done
65 65 /// incrementally.)
66 66 ignore_patterns_hash: IgnorePatternsHash,
67 67 }
68 68
69 69 #[derive(BytesCast)]
70 70 #[repr(C)]
71 71 pub(super) struct Node {
72 72 full_path: PathSlice,
73 73
74 74 /// In bytes from `self.full_path.start`
75 75 base_name_start: Size,
76 76
77 77 copy_source: OptPathSlice,
78 78 children: ChildNodes,
79 79 pub(super) tracked_descendants_count: Size,
80 80
81 81 /// Dependending on the value of `state`:
82 82 ///
83 83 /// * A null byte: `data` is not used.
84 84 ///
85 85 /// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together
86 86 /// represent a dirstate entry like in the v1 format.
87 87 ///
88 88 /// * A `d` ASCII byte: the bytes of `data` should instead be interpreted
89 89 /// as the `Timestamp` for the mtime of a cached directory.
90 90 ///
91 91 /// The presence of this state means that at some point, this path in
92 92 /// the working directory was observed:
93 93 ///
94 94 /// - To be a directory
95 95 /// - With the modification time as given by `Timestamp`
96 96 /// - That timestamp was already strictly in the past when observed,
97 97 /// meaning that later changes cannot happen in the same clock tick
98 98 /// and must cause a different modification time (unless the system
99 99 /// clock jumps back and we get unlucky, which is not impossible but
100 100 /// but deemed unlikely enough).
101 /// - The directory did not contain any child entry that did not have a
102 /// corresponding dirstate node.
101 /// - All direct children of this directory (as returned by
102 /// `std::fs::read_dir`) either have a corresponding dirstate node, or
103 /// are ignored by ignore patterns whose hash is in
104 /// `Header::ignore_patterns_hash`.
103 105 ///
104 106 /// This means that if `std::fs::symlink_metadata` later reports the
105 /// same modification time, we don’t need to call `std::fs::read_dir`
106 /// again for this directory and can iterate child dirstate nodes
107 /// instead.
107 /// same modification time and ignored patterns haven’t changed, a run
108 /// of status that is not listing ignored files can skip calling
109 /// `std::fs::read_dir` again for this directory, iterate child
110 /// dirstate nodes instead.
108 111 state: u8,
109 112 data: Entry,
110 113 }
111 114
112 115 #[derive(BytesCast, Copy, Clone)]
113 116 #[repr(C)]
114 117 struct Entry {
115 118 mode: I32Be,
116 119 mtime: I32Be,
117 120 size: I32Be,
118 121 }
119 122
120 123 /// Duration since the Unix epoch
121 124 #[derive(BytesCast, Copy, Clone, PartialEq)]
122 125 #[repr(C)]
123 126 pub(super) struct Timestamp {
124 127 seconds: I64Be,
125 128
126 129 /// In `0 .. 1_000_000_000`.
127 130 ///
128 131 /// This timestamp is later or earlier than `(seconds, 0)` by this many
129 132 /// nanoseconds, if `seconds` is non-negative or negative, respectively.
130 133 nanoseconds: U32Be,
131 134 }
132 135
133 136 /// Counted in bytes from the start of the file
134 137 ///
135 138 /// NOTE: If we decide to never support `.hg/dirstate` files larger than 4 GiB
136 139 /// we could save space by using `U32Be` instead.
137 140 type Offset = U64Be;
138 141
139 142 /// Counted in number of items
140 143 ///
141 144 /// NOTE: not supporting directories with more than 4 billion direct children,
142 145 /// or filenames more than 4 GiB.
143 146 type Size = U32Be;
144 147
145 148 /// Location of consecutive, fixed-size items.
146 149 ///
147 150 /// An item can be a single byte for paths, or a struct with
148 151 /// `derive(BytesCast)`.
149 152 #[derive(BytesCast, Copy, Clone)]
150 153 #[repr(C)]
151 154 struct Slice {
152 155 start: Offset,
153 156 len: Size,
154 157 }
155 158
156 159 /// A contiguous sequence of `len` times `Node`, representing the child nodes
157 160 /// of either some other node or of the repository root.
158 161 ///
159 162 /// Always sorted by ascending `full_path`, to allow binary search.
160 163 /// Since nodes with the same parent nodes also have the same parent path,
161 164 /// only the `base_name`s need to be compared during binary search.
162 165 type ChildNodes = Slice;
163 166
164 167 /// A `HgPath` of `len` bytes
165 168 type PathSlice = Slice;
166 169
167 170 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
168 171 type OptPathSlice = Slice;
169 172
170 173 /// Make sure that size-affecting changes are made knowingly
171 174 fn _static_assert_size_of() {
172 175 let _ = std::mem::transmute::<Header, [u8; 92]>;
173 176 let _ = std::mem::transmute::<Node, [u8; 57]>;
174 177 }
175 178
176 179 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
177 180 ///
178 181 /// This should only happen if Mercurial is buggy or a repository is corrupted.
179 182 #[derive(Debug)]
180 183 pub struct DirstateV2ParseError;
181 184
182 185 impl From<DirstateV2ParseError> for HgError {
183 186 fn from(_: DirstateV2ParseError) -> Self {
184 187 HgError::corrupted("dirstate-v2 parse error")
185 188 }
186 189 }
187 190
188 191 impl From<DirstateV2ParseError> for crate::DirstateError {
189 192 fn from(error: DirstateV2ParseError) -> Self {
190 193 HgError::from(error).into()
191 194 }
192 195 }
193 196
194 197 fn read_header(on_disk: &[u8]) -> Result<&Header, DirstateV2ParseError> {
195 198 let (header, _) =
196 199 Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
197 200 if header.marker == *V2_FORMAT_MARKER {
198 201 Ok(header)
199 202 } else {
200 203 Err(DirstateV2ParseError)
201 204 }
202 205 }
203 206
204 207 pub(super) fn read<'on_disk>(
205 208 on_disk: &'on_disk [u8],
206 209 ) -> Result<
207 210 (DirstateMap<'on_disk>, Option<DirstateParents>),
208 211 DirstateV2ParseError,
209 212 > {
210 213 if on_disk.is_empty() {
211 214 return Ok((DirstateMap::empty(on_disk), None));
212 215 }
213 216 let header = read_header(on_disk)?;
214 217 let dirstate_map = DirstateMap {
215 218 on_disk,
216 219 root: dirstate_map::ChildNodes::OnDisk(read_slice::<Node>(
217 220 on_disk,
218 221 header.root,
219 222 )?),
220 223 nodes_with_entry_count: header.nodes_with_entry_count.get(),
221 224 nodes_with_copy_source_count: header
222 225 .nodes_with_copy_source_count
223 226 .get(),
224 227 ignore_patterns_hash: header.ignore_patterns_hash,
225 228 };
226 229 let parents = Some(header.parents.clone());
227 230 Ok((dirstate_map, parents))
228 231 }
229 232
230 233 impl Node {
231 234 pub(super) fn full_path<'on_disk>(
232 235 &self,
233 236 on_disk: &'on_disk [u8],
234 237 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
235 238 read_hg_path(on_disk, self.full_path)
236 239 }
237 240
238 241 pub(super) fn base_name_start<'on_disk>(
239 242 &self,
240 243 ) -> Result<usize, DirstateV2ParseError> {
241 244 let start = self.base_name_start.get();
242 245 if start < self.full_path.len.get() {
243 246 let start = usize::try_from(start)
244 247 // u32 -> usize, could only panic on a 16-bit CPU
245 248 .expect("dirstate-v2 base_name_start out of bounds");
246 249 Ok(start)
247 250 } else {
248 251 Err(DirstateV2ParseError)
249 252 }
250 253 }
251 254
252 255 pub(super) fn base_name<'on_disk>(
253 256 &self,
254 257 on_disk: &'on_disk [u8],
255 258 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
256 259 let full_path = self.full_path(on_disk)?;
257 260 let base_name_start = self.base_name_start()?;
258 261 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
259 262 }
260 263
261 264 pub(super) fn path<'on_disk>(
262 265 &self,
263 266 on_disk: &'on_disk [u8],
264 267 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
265 268 Ok(WithBasename::from_raw_parts(
266 269 Cow::Borrowed(self.full_path(on_disk)?),
267 270 self.base_name_start()?,
268 271 ))
269 272 }
270 273
271 274 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
272 275 self.copy_source.start.get() != 0
273 276 }
274 277
275 278 pub(super) fn copy_source<'on_disk>(
276 279 &self,
277 280 on_disk: &'on_disk [u8],
278 281 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
279 282 Ok(if self.has_copy_source() {
280 283 Some(read_hg_path(on_disk, self.copy_source)?)
281 284 } else {
282 285 None
283 286 })
284 287 }
285 288
286 289 pub(super) fn node_data(
287 290 &self,
288 291 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
289 292 let entry = |state| {
290 293 dirstate_map::NodeData::Entry(self.entry_with_given_state(state))
291 294 };
292 295
293 296 match self.state {
294 297 b'\0' => Ok(dirstate_map::NodeData::None),
295 298 b'd' => Ok(dirstate_map::NodeData::CachedDirectory {
296 299 mtime: *self.data.as_timestamp(),
297 300 }),
298 301 b'n' => Ok(entry(EntryState::Normal)),
299 302 b'a' => Ok(entry(EntryState::Added)),
300 303 b'r' => Ok(entry(EntryState::Removed)),
301 304 b'm' => Ok(entry(EntryState::Merged)),
302 305 _ => Err(DirstateV2ParseError),
303 306 }
304 307 }
305 308
306 309 pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {
307 310 if self.state == b'd' {
308 311 Some(self.data.as_timestamp())
309 312 } else {
310 313 None
311 314 }
312 315 }
313 316
314 317 pub(super) fn state(
315 318 &self,
316 319 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
317 320 match self.state {
318 321 b'\0' | b'd' => Ok(None),
319 322 b'n' => Ok(Some(EntryState::Normal)),
320 323 b'a' => Ok(Some(EntryState::Added)),
321 324 b'r' => Ok(Some(EntryState::Removed)),
322 325 b'm' => Ok(Some(EntryState::Merged)),
323 326 _ => Err(DirstateV2ParseError),
324 327 }
325 328 }
326 329
327 330 fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry {
328 331 DirstateEntry {
329 332 state,
330 333 mode: self.data.mode.get(),
331 334 mtime: self.data.mtime.get(),
332 335 size: self.data.size.get(),
333 336 }
334 337 }
335 338
336 339 pub(super) fn entry(
337 340 &self,
338 341 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
339 342 Ok(self
340 343 .state()?
341 344 .map(|state| self.entry_with_given_state(state)))
342 345 }
343 346
344 347 pub(super) fn children<'on_disk>(
345 348 &self,
346 349 on_disk: &'on_disk [u8],
347 350 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
348 351 read_slice::<Node>(on_disk, self.children)
349 352 }
350 353
351 354 pub(super) fn to_in_memory_node<'on_disk>(
352 355 &self,
353 356 on_disk: &'on_disk [u8],
354 357 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
355 358 Ok(dirstate_map::Node {
356 359 children: dirstate_map::ChildNodes::OnDisk(
357 360 self.children(on_disk)?,
358 361 ),
359 362 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
360 363 data: self.node_data()?,
361 364 tracked_descendants_count: self.tracked_descendants_count.get(),
362 365 })
363 366 }
364 367 }
365 368
366 369 impl Entry {
367 370 fn from_timestamp(timestamp: Timestamp) -> Self {
368 371 // Safety: both types implement the `ByteCast` trait, so we could
369 372 // safely use `as_bytes` and `from_bytes` to do this conversion. Using
370 373 // `transmute` instead makes the compiler check that the two types
371 374 // have the same size, which eliminates the error case of
372 375 // `from_bytes`.
373 376 unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }
374 377 }
375 378
376 379 fn as_timestamp(&self) -> &Timestamp {
377 380 // Safety: same as above in `from_timestamp`
378 381 unsafe { &*(self as *const Entry as *const Timestamp) }
379 382 }
380 383 }
381 384
382 385 impl Timestamp {
383 386 pub fn seconds(&self) -> i64 {
384 387 self.seconds.get()
385 388 }
386 389 }
387 390
388 391 impl From<SystemTime> for Timestamp {
389 392 fn from(system_time: SystemTime) -> Self {
390 393 let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {
391 394 Ok(duration) => {
392 395 (duration.as_secs() as i64, duration.subsec_nanos())
393 396 }
394 397 Err(error) => {
395 398 let negative = error.duration();
396 399 (-(negative.as_secs() as i64), negative.subsec_nanos())
397 400 }
398 401 };
399 402 Timestamp {
400 403 seconds: secs.into(),
401 404 nanoseconds: nanos.into(),
402 405 }
403 406 }
404 407 }
405 408
406 409 impl From<&'_ Timestamp> for SystemTime {
407 410 fn from(timestamp: &'_ Timestamp) -> Self {
408 411 let secs = timestamp.seconds.get();
409 412 let nanos = timestamp.nanoseconds.get();
410 413 if secs >= 0 {
411 414 UNIX_EPOCH + Duration::new(secs as u64, nanos)
412 415 } else {
413 416 UNIX_EPOCH - Duration::new((-secs) as u64, nanos)
414 417 }
415 418 }
416 419 }
417 420
418 421 fn read_hg_path(
419 422 on_disk: &[u8],
420 423 slice: Slice,
421 424 ) -> Result<&HgPath, DirstateV2ParseError> {
422 425 let bytes = read_slice::<u8>(on_disk, slice)?;
423 426 Ok(HgPath::new(bytes))
424 427 }
425 428
426 429 fn read_slice<T>(
427 430 on_disk: &[u8],
428 431 slice: Slice,
429 432 ) -> Result<&[T], DirstateV2ParseError>
430 433 where
431 434 T: BytesCast,
432 435 {
433 436 // Either `usize::MAX` would result in "out of bounds" error since a single
434 437 // `&[u8]` cannot occupy the entire addess space.
435 438 let start = usize::try_from(slice.start.get()).unwrap_or(std::usize::MAX);
436 439 let len = usize::try_from(slice.len.get()).unwrap_or(std::usize::MAX);
437 440 on_disk
438 441 .get(start..)
439 442 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
440 443 .map(|(slice, _rest)| slice)
441 444 .ok_or_else(|| DirstateV2ParseError)
442 445 }
443 446
444 447 pub(crate) fn parse_dirstate_parents(
445 448 on_disk: &[u8],
446 449 ) -> Result<&DirstateParents, HgError> {
447 450 Ok(&read_header(on_disk)?.parents)
448 451 }
449 452
450 453 pub(crate) fn for_each_tracked_path<'on_disk>(
451 454 on_disk: &'on_disk [u8],
452 455 mut f: impl FnMut(&'on_disk HgPath),
453 456 ) -> Result<(), DirstateV2ParseError> {
454 457 let header = read_header(on_disk)?;
455 458 fn recur<'on_disk>(
456 459 on_disk: &'on_disk [u8],
457 460 nodes: Slice,
458 461 f: &mut impl FnMut(&'on_disk HgPath),
459 462 ) -> Result<(), DirstateV2ParseError> {
460 463 for node in read_slice::<Node>(on_disk, nodes)? {
461 464 if let Some(state) = node.state()? {
462 465 if state.is_tracked() {
463 466 f(node.full_path(on_disk)?)
464 467 }
465 468 }
466 469 recur(on_disk, node.children, f)?
467 470 }
468 471 Ok(())
469 472 }
470 473 recur(on_disk, header.root, &mut f)
471 474 }
472 475
473 476 pub(super) fn write(
474 477 dirstate_map: &mut DirstateMap,
475 478 parents: DirstateParents,
476 479 ) -> Result<Vec<u8>, DirstateError> {
477 480 let header_len = std::mem::size_of::<Header>();
478 481
479 482 // This ignores the space for paths, and for nodes without an entry.
480 483 // TODO: better estimate? Skip the `Vec` and write to a file directly?
481 484 let size_guess = header_len
482 485 + std::mem::size_of::<Node>()
483 486 * dirstate_map.nodes_with_entry_count as usize;
484 487 let mut out = Vec::with_capacity(size_guess);
485 488
486 489 // Keep space for the header. We’ll fill it out at the end when we know the
487 490 // actual offset for the root nodes.
488 491 out.resize(header_len, 0_u8);
489 492
490 493 let root =
491 494 write_nodes(dirstate_map, dirstate_map.root.as_ref(), &mut out)?;
492 495
493 496 let header = Header {
494 497 marker: *V2_FORMAT_MARKER,
495 498 parents: parents,
496 499 root,
497 500 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
498 501 nodes_with_copy_source_count: dirstate_map
499 502 .nodes_with_copy_source_count
500 503 .into(),
501 504 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
502 505 };
503 506 out[..header_len].copy_from_slice(header.as_bytes());
504 507 Ok(out)
505 508 }
506 509
507 510 fn write_nodes(
508 511 dirstate_map: &DirstateMap,
509 512 nodes: dirstate_map::ChildNodesRef,
510 513 out: &mut Vec<u8>,
511 514 ) -> Result<ChildNodes, DirstateError> {
512 515 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
513 516 // order. Sort to enable binary search in the written file.
514 517 let nodes = nodes.sorted();
515 518
516 519 // First accumulate serialized nodes in a `Vec`
517 520 let mut on_disk_nodes = Vec::with_capacity(nodes.len());
518 521 for node in nodes {
519 522 let children = write_nodes(
520 523 dirstate_map,
521 524 node.children(dirstate_map.on_disk)?,
522 525 out,
523 526 )?;
524 527 let full_path = node.full_path(dirstate_map.on_disk)?;
525 528 let full_path = write_slice::<u8>(full_path.as_bytes(), out);
526 529 let copy_source =
527 530 if let Some(source) = node.copy_source(dirstate_map.on_disk)? {
528 531 write_slice::<u8>(source.as_bytes(), out)
529 532 } else {
530 533 Slice {
531 534 start: 0.into(),
532 535 len: 0.into(),
533 536 }
534 537 };
535 538 on_disk_nodes.push(match node {
536 539 NodeRef::InMemory(path, node) => {
537 540 let (state, data) = match &node.data {
538 541 dirstate_map::NodeData::Entry(entry) => (
539 542 entry.state.into(),
540 543 Entry {
541 544 mode: entry.mode.into(),
542 545 mtime: entry.mtime.into(),
543 546 size: entry.size.into(),
544 547 },
545 548 ),
546 549 dirstate_map::NodeData::CachedDirectory { mtime } => {
547 550 (b'd', Entry::from_timestamp(*mtime))
548 551 }
549 552 dirstate_map::NodeData::None => (
550 553 b'\0',
551 554 Entry {
552 555 mode: 0.into(),
553 556 mtime: 0.into(),
554 557 size: 0.into(),
555 558 },
556 559 ),
557 560 };
558 561 Node {
559 562 children,
560 563 copy_source,
561 564 full_path,
562 565 base_name_start: u32::try_from(path.base_name_start())
563 566 // Could only panic for paths over 4 GiB
564 567 .expect("dirstate-v2 offset overflow")
565 568 .into(),
566 569 tracked_descendants_count: node
567 570 .tracked_descendants_count
568 571 .into(),
569 572 state,
570 573 data,
571 574 }
572 575 }
573 576 NodeRef::OnDisk(node) => Node {
574 577 children,
575 578 copy_source,
576 579 full_path,
577 580 ..*node
578 581 },
579 582 })
580 583 }
581 584 // … so we can write them contiguously
582 585 Ok(write_slice::<Node>(&on_disk_nodes, out))
583 586 }
584 587
585 588 fn write_slice<T>(slice: &[T], out: &mut Vec<u8>) -> Slice
586 589 where
587 590 T: BytesCast,
588 591 {
589 592 let start = u64::try_from(out.len())
590 593 // Could only panic on a 128-bit CPU with a dirstate over 16 EiB
591 594 .expect("dirstate-v2 offset overflow")
592 595 .into();
593 596 let len = u32::try_from(slice.len())
594 597 // Could only panic for paths over 4 GiB or nodes with over 4 billions
595 598 // child nodes
596 599 .expect("dirstate-v2 offset overflow")
597 600 .into();
598 601 out.extend(slice.as_bytes());
599 602 Slice { start, len }
600 603 }
@@ -1,728 +1,758 b''
1 1 use crate::dirstate::status::IgnoreFnType;
2 2 use crate::dirstate_tree::dirstate_map::BorrowedPath;
3 3 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
4 4 use crate::dirstate_tree::dirstate_map::DirstateMap;
5 5 use crate::dirstate_tree::dirstate_map::NodeData;
6 6 use crate::dirstate_tree::dirstate_map::NodeRef;
7 7 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
8 8 use crate::dirstate_tree::on_disk::Timestamp;
9 9 use crate::matchers::get_ignore_function;
10 10 use crate::matchers::Matcher;
11 11 use crate::utils::files::get_bytes_from_os_string;
12 12 use crate::utils::files::get_path_from_bytes;
13 13 use crate::utils::hg_path::HgPath;
14 14 use crate::BadMatch;
15 15 use crate::DirstateStatus;
16 16 use crate::EntryState;
17 17 use crate::HgPathBuf;
18 18 use crate::PatternFileWarning;
19 19 use crate::StatusError;
20 20 use crate::StatusOptions;
21 21 use micro_timer::timed;
22 22 use rayon::prelude::*;
23 23 use sha1::{Digest, Sha1};
24 24 use std::borrow::Cow;
25 25 use std::io;
26 26 use std::path::Path;
27 27 use std::path::PathBuf;
28 28 use std::sync::Mutex;
29 29 use std::time::SystemTime;
30 30
31 31 /// Returns the status of the working directory compared to its parent
32 32 /// changeset.
33 33 ///
34 34 /// This algorithm is based on traversing the filesystem tree (`fs` in function
35 35 /// and variable names) and dirstate tree at the same time. The core of this
36 36 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
37 37 /// and its use of `itertools::merge_join_by`. When reaching a path that only
38 38 /// exists in one of the two trees, depending on information requested by
39 39 /// `options` we may need to traverse the remaining subtree.
40 40 #[timed]
41 41 pub fn status<'tree, 'on_disk: 'tree>(
42 42 dmap: &'tree mut DirstateMap<'on_disk>,
43 43 matcher: &(dyn Matcher + Sync),
44 44 root_dir: PathBuf,
45 45 ignore_files: Vec<PathBuf>,
46 46 options: StatusOptions,
47 47 ) -> Result<(DirstateStatus<'on_disk>, Vec<PatternFileWarning>), StatusError> {
48 48 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
49 49 if options.list_ignored || options.list_unknown {
50 50 let mut hasher = Sha1::new();
51 51 let (ignore_fn, warnings) = get_ignore_function(
52 52 ignore_files,
53 53 &root_dir,
54 54 &mut |pattern_bytes| hasher.update(pattern_bytes),
55 55 )?;
56 56 let new_hash = *hasher.finalize().as_ref();
57 57 let changed = new_hash != dmap.ignore_patterns_hash;
58 58 dmap.ignore_patterns_hash = new_hash;
59 59 (ignore_fn, warnings, Some(changed))
60 60 } else {
61 61 (Box::new(|&_| true), vec![], None)
62 62 };
63 63
64 64 let common = StatusCommon {
65 65 dmap,
66 66 options,
67 67 matcher,
68 68 ignore_fn,
69 69 outcome: Default::default(),
70 70 ignore_patterns_have_changed: patterns_changed,
71 71 new_cachable_directories: Default::default(),
72 72 outated_cached_directories: Default::default(),
73 73 filesystem_time_at_status_start: filesystem_now(&root_dir).ok(),
74 74 };
75 75 let is_at_repo_root = true;
76 76 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
77 77 let has_ignored_ancestor = false;
78 78 let root_cached_mtime = None;
79 79 let root_dir_metadata = None;
80 80 // If the path we have for the repository root is a symlink, do follow it.
81 81 // (As opposed to symlinks within the working directory which are not
82 82 // followed, using `std::fs::symlink_metadata`.)
83 83 common.traverse_fs_directory_and_dirstate(
84 84 has_ignored_ancestor,
85 85 dmap.root.as_ref(),
86 86 hg_path,
87 87 &root_dir,
88 88 root_dir_metadata,
89 89 root_cached_mtime,
90 90 is_at_repo_root,
91 91 )?;
92 92 let mut outcome = common.outcome.into_inner().unwrap();
93 93 let new_cachable = common.new_cachable_directories.into_inner().unwrap();
94 94 let outdated = common.outated_cached_directories.into_inner().unwrap();
95 95
96 96 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
97 97 || !outdated.is_empty()
98 98 || !new_cachable.is_empty();
99 99
100 100 // Remove outdated mtimes before adding new mtimes, in case a given
101 101 // directory is both
102 102 for path in &outdated {
103 103 let node = dmap.get_or_insert(path)?;
104 104 if let NodeData::CachedDirectory { .. } = &node.data {
105 105 node.data = NodeData::None
106 106 }
107 107 }
108 108 for (path, mtime) in &new_cachable {
109 109 let node = dmap.get_or_insert(path)?;
110 110 match &node.data {
111 111 NodeData::Entry(_) => {} // Don’t overwrite an entry
112 112 NodeData::CachedDirectory { .. } | NodeData::None => {
113 113 node.data = NodeData::CachedDirectory { mtime: *mtime }
114 114 }
115 115 }
116 116 }
117 117
118 118 Ok((outcome, warnings))
119 119 }
120 120
121 121 /// Bag of random things needed by various parts of the algorithm. Reduces the
122 122 /// number of parameters passed to functions.
123 123 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
124 124 dmap: &'tree DirstateMap<'on_disk>,
125 125 options: StatusOptions,
126 126 matcher: &'a (dyn Matcher + Sync),
127 127 ignore_fn: IgnoreFnType<'a>,
128 128 outcome: Mutex<DirstateStatus<'on_disk>>,
129 129 new_cachable_directories: Mutex<Vec<(Cow<'on_disk, HgPath>, Timestamp)>>,
130 130 outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
131 131
132 132 /// Whether ignore files like `.hgignore` have changed since the previous
133 133 /// time a `status()` call wrote their hash to the dirstate. `None` means
134 134 /// we don’t know as this run doesn’t list either ignored or uknown files
135 135 /// and therefore isn’t reading `.hgignore`.
136 136 ignore_patterns_have_changed: Option<bool>,
137 137
138 138 /// The current time at the start of the `status()` algorithm, as measured
139 139 /// and possibly truncated by the filesystem.
140 140 filesystem_time_at_status_start: Option<SystemTime>,
141 141 }
142 142
143 143 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
144 144 fn read_dir(
145 145 &self,
146 146 hg_path: &HgPath,
147 147 fs_path: &Path,
148 148 is_at_repo_root: bool,
149 149 ) -> Result<Vec<DirEntry>, ()> {
150 150 DirEntry::read_dir(fs_path, is_at_repo_root)
151 151 .map_err(|error| self.io_error(error, hg_path))
152 152 }
153 153
154 154 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
155 155 let errno = error.raw_os_error().expect("expected real OS error");
156 156 self.outcome
157 157 .lock()
158 158 .unwrap()
159 159 .bad
160 160 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
161 161 }
162 162
163 163 fn check_for_outdated_directory_cache(
164 164 &self,
165 165 dirstate_node: &NodeRef<'tree, 'on_disk>,
166 166 ) -> Result<(), DirstateV2ParseError> {
167 167 if self.ignore_patterns_have_changed == Some(true)
168 168 && dirstate_node.cached_directory_mtime().is_some()
169 169 {
170 170 self.outated_cached_directories.lock().unwrap().push(
171 171 dirstate_node
172 172 .full_path_borrowed(self.dmap.on_disk)?
173 173 .detach_from_tree(),
174 174 )
175 175 }
176 176 Ok(())
177 177 }
178 178
179 179 /// If this returns true, we can get accurate results by only using
180 180 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
181 181 /// need to call `read_dir`.
182 182 fn can_skip_fs_readdir(
183 183 &self,
184 184 directory_metadata: Option<&std::fs::Metadata>,
185 185 cached_directory_mtime: Option<&Timestamp>,
186 186 ) -> bool {
187 187 if !self.options.list_unknown && !self.options.list_ignored {
188 188 // All states that we care about listing have corresponding
189 189 // dirstate entries.
190 190 // This happens for example with `hg status -mard`.
191 191 return true;
192 192 }
193 if !self.options.list_ignored
194 && self.ignore_patterns_have_changed == Some(false)
195 {
193 196 if let Some(cached_mtime) = cached_directory_mtime {
194 // The dirstate contains a cached mtime for this directory, set by
195 // a previous run of the `status` algorithm which found this
197 // The dirstate contains a cached mtime for this directory, set
198 // by a previous run of the `status` algorithm which found this
196 199 // directory eligible for `read_dir` caching.
197 200 if let Some(meta) = directory_metadata {
198 201 if let Ok(current_mtime) = meta.modified() {
199 202 if current_mtime == cached_mtime.into() {
200 // The mtime of that directory has not changed since
201 // then, which means that the
202 // results of `read_dir` should also
203 // be unchanged.
203 // The mtime of that directory has not changed
204 // since then, which means that the results of
205 // `read_dir` should also be unchanged.
204 206 return true;
205 207 }
206 208 }
207 209 }
208 210 }
211 }
209 212 false
210 213 }
211 214
212 /// Returns whether the filesystem directory was found to have any entry
213 /// that does not have a corresponding dirstate tree node.
215 /// Returns whether all child entries of the filesystem directory have a
216 /// corresponding dirstate node or are ignored.
214 217 fn traverse_fs_directory_and_dirstate(
215 218 &self,
216 219 has_ignored_ancestor: bool,
217 220 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
218 221 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
219 222 directory_fs_path: &Path,
220 223 directory_metadata: Option<&std::fs::Metadata>,
221 224 cached_directory_mtime: Option<&Timestamp>,
222 225 is_at_repo_root: bool,
223 226 ) -> Result<bool, DirstateV2ParseError> {
224 227 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
225 228 {
226 229 dirstate_nodes
227 230 .par_iter()
228 231 .map(|dirstate_node| {
229 232 let fs_path = directory_fs_path.join(get_path_from_bytes(
230 233 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
231 234 ));
232 235 match std::fs::symlink_metadata(&fs_path) {
233 236 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
234 237 &fs_path,
235 238 &fs_metadata,
236 239 dirstate_node,
237 240 has_ignored_ancestor,
238 241 ),
239 242 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
240 243 self.traverse_dirstate_only(dirstate_node)
241 244 }
242 245 Err(error) => {
243 246 let hg_path =
244 247 dirstate_node.full_path(self.dmap.on_disk)?;
245 248 Ok(self.io_error(error, hg_path))
246 249 }
247 250 }
248 251 })
249 252 .collect::<Result<_, _>>()?;
250 253
251 // Conservatively don’t let the caller assume that there aren’t
252 // any, since we don’t know.
253 let directory_has_any_fs_only_entry = true;
254 // We don’t know, so conservatively say this isn’t the case
255 let children_all_have_dirstate_node_or_are_ignored = false;
254 256
255 return Ok(directory_has_any_fs_only_entry);
257 return Ok(children_all_have_dirstate_node_or_are_ignored);
256 258 }
257 259
258 260 let mut fs_entries = if let Ok(entries) = self.read_dir(
259 261 directory_hg_path,
260 262 directory_fs_path,
261 263 is_at_repo_root,
262 264 ) {
263 265 entries
264 266 } else {
265 267 // Treat an unreadable directory (typically because of insufficient
266 268 // permissions) like an empty directory. `self.read_dir` has
267 269 // already called `self.io_error` so a warning will be emitted.
268 270 Vec::new()
269 271 };
270 272
271 273 // `merge_join_by` requires both its input iterators to be sorted:
272 274
273 275 let dirstate_nodes = dirstate_nodes.sorted();
274 276 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
275 277 // https://github.com/rust-lang/rust/issues/34162
276 278 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
277 279
278 280 // Propagate here any error that would happen inside the comparison
279 281 // callback below
280 282 for dirstate_node in &dirstate_nodes {
281 283 dirstate_node.base_name(self.dmap.on_disk)?;
282 284 }
283 285 itertools::merge_join_by(
284 286 dirstate_nodes,
285 287 &fs_entries,
286 288 |dirstate_node, fs_entry| {
287 289 // This `unwrap` never panics because we already propagated
288 290 // those errors above
289 291 dirstate_node
290 292 .base_name(self.dmap.on_disk)
291 293 .unwrap()
292 294 .cmp(&fs_entry.base_name)
293 295 },
294 296 )
295 297 .par_bridge()
296 298 .map(|pair| {
297 299 use itertools::EitherOrBoth::*;
298 let is_fs_only = pair.is_right();
300 let has_dirstate_node_or_is_ignored;
299 301 match pair {
300 Both(dirstate_node, fs_entry) => self
301 .traverse_fs_and_dirstate(
302 Both(dirstate_node, fs_entry) => {
303 self.traverse_fs_and_dirstate(
302 304 &fs_entry.full_path,
303 305 &fs_entry.metadata,
304 306 dirstate_node,
305 307 has_ignored_ancestor,
306 )?,
308 )?;
309 has_dirstate_node_or_is_ignored = true
310 }
307 311 Left(dirstate_node) => {
308 self.traverse_dirstate_only(dirstate_node)?
312 self.traverse_dirstate_only(dirstate_node)?;
313 has_dirstate_node_or_is_ignored = true;
309 314 }
310 Right(fs_entry) => self.traverse_fs_only(
315 Right(fs_entry) => {
316 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
311 317 has_ignored_ancestor,
312 318 directory_hg_path,
313 319 fs_entry,
314 ),
320 )
321 }
315 322 }
316 Ok(is_fs_only)
323 Ok(has_dirstate_node_or_is_ignored)
317 324 })
318 .try_reduce(|| false, |a, b| Ok(a || b))
325 .try_reduce(|| true, |a, b| Ok(a && b))
319 326 }
320 327
321 328 fn traverse_fs_and_dirstate(
322 329 &self,
323 330 fs_path: &Path,
324 331 fs_metadata: &std::fs::Metadata,
325 332 dirstate_node: NodeRef<'tree, 'on_disk>,
326 333 has_ignored_ancestor: bool,
327 334 ) -> Result<(), DirstateV2ParseError> {
328 335 self.check_for_outdated_directory_cache(&dirstate_node)?;
329 336 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
330 337 let file_type = fs_metadata.file_type();
331 338 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
332 339 if !file_or_symlink {
333 340 // If we previously had a file here, it was removed (with
334 341 // `hg rm` or similar) or deleted before it could be
335 342 // replaced by a directory or something else.
336 343 self.mark_removed_or_deleted_if_file(
337 344 &hg_path,
338 345 dirstate_node.state()?,
339 346 );
340 347 }
341 348 if file_type.is_dir() {
342 349 if self.options.collect_traversed_dirs {
343 350 self.outcome
344 351 .lock()
345 352 .unwrap()
346 353 .traversed
347 354 .push(hg_path.detach_from_tree())
348 355 }
349 356 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
350 357 let is_at_repo_root = false;
351 let directory_has_any_fs_only_entry = self
358 let children_all_have_dirstate_node_or_are_ignored = self
352 359 .traverse_fs_directory_and_dirstate(
353 360 is_ignored,
354 361 dirstate_node.children(self.dmap.on_disk)?,
355 362 hg_path,
356 363 fs_path,
357 364 Some(fs_metadata),
358 365 dirstate_node.cached_directory_mtime(),
359 366 is_at_repo_root,
360 367 )?;
361 368 self.maybe_save_directory_mtime(
362 directory_has_any_fs_only_entry,
369 children_all_have_dirstate_node_or_are_ignored,
363 370 fs_metadata,
364 371 dirstate_node,
365 372 )?
366 373 } else {
367 374 if file_or_symlink && self.matcher.matches(hg_path) {
368 375 if let Some(state) = dirstate_node.state()? {
369 376 match state {
370 377 EntryState::Added => self
371 378 .outcome
372 379 .lock()
373 380 .unwrap()
374 381 .added
375 382 .push(hg_path.detach_from_tree()),
376 383 EntryState::Removed => self
377 384 .outcome
378 385 .lock()
379 386 .unwrap()
380 387 .removed
381 388 .push(hg_path.detach_from_tree()),
382 389 EntryState::Merged => self
383 390 .outcome
384 391 .lock()
385 392 .unwrap()
386 393 .modified
387 394 .push(hg_path.detach_from_tree()),
388 395 EntryState::Normal => self
389 396 .handle_normal_file(&dirstate_node, fs_metadata)?,
390 397 // This variant is not used in DirstateMap
391 398 // nodes
392 399 EntryState::Unknown => unreachable!(),
393 400 }
394 401 } else {
395 402 // `node.entry.is_none()` indicates a "directory"
396 403 // node, but the filesystem has a file
397 self.mark_unknown_or_ignored(has_ignored_ancestor, hg_path)
404 self.mark_unknown_or_ignored(
405 has_ignored_ancestor,
406 hg_path,
407 );
398 408 }
399 409 }
400 410
401 411 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
402 412 {
403 413 self.traverse_dirstate_only(child_node)?
404 414 }
405 415 }
406 416 Ok(())
407 417 }
408 418
409 419 fn maybe_save_directory_mtime(
410 420 &self,
411 directory_has_any_fs_only_entry: bool,
421 children_all_have_dirstate_node_or_are_ignored: bool,
412 422 directory_metadata: &std::fs::Metadata,
413 423 dirstate_node: NodeRef<'tree, 'on_disk>,
414 424 ) -> Result<(), DirstateV2ParseError> {
415 if !directory_has_any_fs_only_entry {
425 if children_all_have_dirstate_node_or_are_ignored {
416 426 // All filesystem directory entries from `read_dir` have a
417 427 // corresponding node in the dirstate, so we can reconstitute the
418 428 // names of those entries without calling `read_dir` again.
419 429 if let (Some(status_start), Ok(directory_mtime)) = (
420 430 &self.filesystem_time_at_status_start,
421 431 directory_metadata.modified(),
422 432 ) {
423 433 // Although the Rust standard library’s `SystemTime` type
424 434 // has nanosecond precision, the times reported for a
425 435 // directory’s (or file’s) modified time may have lower
426 436 // resolution based on the filesystem (for example ext3
427 437 // only stores integer seconds), kernel (see
428 438 // https://stackoverflow.com/a/14393315/1162888), etc.
429 439 if &directory_mtime >= status_start {
430 440 // The directory was modified too recently, don’t cache its
431 441 // `read_dir` results.
432 442 //
433 443 // A timeline like this is possible:
434 444 //
435 445 // 1. A change to this directory (direct child was
436 446 // added or removed) cause its mtime to be set
437 447 // (possibly truncated) to `directory_mtime`
438 448 // 2. This `status` algorithm calls `read_dir`
439 449 // 3. An other change is made to the same directory is
440 450 // made so that calling `read_dir` agin would give
441 451 // different results, but soon enough after 1. that
442 452 // the mtime stays the same
443 453 //
444 454 // On a system where the time resolution poor, this
445 455 // scenario is not unlikely if all three steps are caused
446 456 // by the same script.
447 457 } else {
448 458 // We’ve observed (through `status_start`) that time has
449 459 // “progressed” since `directory_mtime`, so any further
450 460 // change to this directory is extremely likely to cause a
451 461 // different mtime.
452 462 //
453 463 // Having the same mtime again is not entirely impossible
454 464 // since the system clock is not monotonous. It could jump
455 465 // backward to some point before `directory_mtime`, then a
456 466 // directory change could potentially happen during exactly
457 467 // the wrong tick.
458 468 //
459 469 // We deem this scenario (unlike the previous one) to be
460 470 // unlikely enough in practice.
461 471 let timestamp = directory_mtime.into();
462 472 let cached = dirstate_node.cached_directory_mtime();
463 473 if cached != Some(&timestamp) {
464 474 let hg_path = dirstate_node
465 475 .full_path_borrowed(self.dmap.on_disk)?
466 476 .detach_from_tree();
467 477 self.new_cachable_directories
468 478 .lock()
469 479 .unwrap()
470 480 .push((hg_path, timestamp))
471 481 }
472 482 }
473 483 }
474 484 }
475 485 Ok(())
476 486 }
477 487
478 488 /// A file with `EntryState::Normal` in the dirstate was found in the
479 489 /// filesystem
480 490 fn handle_normal_file(
481 491 &self,
482 492 dirstate_node: &NodeRef<'tree, 'on_disk>,
483 493 fs_metadata: &std::fs::Metadata,
484 494 ) -> Result<(), DirstateV2ParseError> {
485 495 // Keep the low 31 bits
486 496 fn truncate_u64(value: u64) -> i32 {
487 497 (value & 0x7FFF_FFFF) as i32
488 498 }
489 499 fn truncate_i64(value: i64) -> i32 {
490 500 (value & 0x7FFF_FFFF) as i32
491 501 }
492 502
493 503 let entry = dirstate_node
494 504 .entry()?
495 505 .expect("handle_normal_file called with entry-less node");
496 506 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
497 507 let mode_changed =
498 508 || self.options.check_exec && entry.mode_changed(fs_metadata);
499 509 let size_changed = entry.size != truncate_u64(fs_metadata.len());
500 510 if entry.size >= 0
501 511 && size_changed
502 512 && fs_metadata.file_type().is_symlink()
503 513 {
504 514 // issue6456: Size returned may be longer due to encryption
505 515 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
506 516 self.outcome
507 517 .lock()
508 518 .unwrap()
509 519 .unsure
510 520 .push(hg_path.detach_from_tree())
511 521 } else if dirstate_node.has_copy_source()
512 522 || entry.is_from_other_parent()
513 523 || (entry.size >= 0 && (size_changed || mode_changed()))
514 524 {
515 525 self.outcome
516 526 .lock()
517 527 .unwrap()
518 528 .modified
519 529 .push(hg_path.detach_from_tree())
520 530 } else {
521 531 let mtime = mtime_seconds(fs_metadata);
522 532 if truncate_i64(mtime) != entry.mtime
523 533 || mtime == self.options.last_normal_time
524 534 {
525 535 self.outcome
526 536 .lock()
527 537 .unwrap()
528 538 .unsure
529 539 .push(hg_path.detach_from_tree())
530 540 } else if self.options.list_clean {
531 541 self.outcome
532 542 .lock()
533 543 .unwrap()
534 544 .clean
535 545 .push(hg_path.detach_from_tree())
536 546 }
537 547 }
538 548 Ok(())
539 549 }
540 550
541 551 /// A node in the dirstate tree has no corresponding filesystem entry
542 552 fn traverse_dirstate_only(
543 553 &self,
544 554 dirstate_node: NodeRef<'tree, 'on_disk>,
545 555 ) -> Result<(), DirstateV2ParseError> {
546 556 self.check_for_outdated_directory_cache(&dirstate_node)?;
547 557 self.mark_removed_or_deleted_if_file(
548 558 &dirstate_node.full_path_borrowed(self.dmap.on_disk)?,
549 559 dirstate_node.state()?,
550 560 );
551 561 dirstate_node
552 562 .children(self.dmap.on_disk)?
553 563 .par_iter()
554 564 .map(|child_node| self.traverse_dirstate_only(child_node))
555 565 .collect()
556 566 }
557 567
558 568 /// A node in the dirstate tree has no corresponding *file* on the
559 569 /// filesystem
560 570 ///
561 571 /// Does nothing on a "directory" node
562 572 fn mark_removed_or_deleted_if_file(
563 573 &self,
564 574 hg_path: &BorrowedPath<'tree, 'on_disk>,
565 575 dirstate_node_state: Option<EntryState>,
566 576 ) {
567 577 if let Some(state) = dirstate_node_state {
568 578 if self.matcher.matches(hg_path) {
569 579 if let EntryState::Removed = state {
570 580 self.outcome
571 581 .lock()
572 582 .unwrap()
573 583 .removed
574 584 .push(hg_path.detach_from_tree())
575 585 } else {
576 586 self.outcome
577 587 .lock()
578 588 .unwrap()
579 589 .deleted
580 590 .push(hg_path.detach_from_tree())
581 591 }
582 592 }
583 593 }
584 594 }
585 595
586 596 /// Something in the filesystem has no corresponding dirstate node
597 ///
598 /// Returns whether that path is ignored
587 599 fn traverse_fs_only(
588 600 &self,
589 601 has_ignored_ancestor: bool,
590 602 directory_hg_path: &HgPath,
591 603 fs_entry: &DirEntry,
592 ) {
604 ) -> bool {
593 605 let hg_path = directory_hg_path.join(&fs_entry.base_name);
594 606 let file_type = fs_entry.metadata.file_type();
595 607 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
596 608 if file_type.is_dir() {
597 609 let is_ignored =
598 610 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
599 611 let traverse_children = if is_ignored {
600 612 // Descendants of an ignored directory are all ignored
601 613 self.options.list_ignored
602 614 } else {
603 615 // Descendants of an unknown directory may be either unknown or
604 616 // ignored
605 617 self.options.list_unknown || self.options.list_ignored
606 618 };
607 619 if traverse_children {
608 620 let is_at_repo_root = false;
609 621 if let Ok(children_fs_entries) = self.read_dir(
610 622 &hg_path,
611 623 &fs_entry.full_path,
612 624 is_at_repo_root,
613 625 ) {
614 626 children_fs_entries.par_iter().for_each(|child_fs_entry| {
615 627 self.traverse_fs_only(
616 628 is_ignored,
617 629 &hg_path,
618 630 child_fs_entry,
619 )
631 );
620 632 })
621 633 }
622 634 }
623 635 if self.options.collect_traversed_dirs {
624 636 self.outcome.lock().unwrap().traversed.push(hg_path.into())
625 637 }
626 } else if file_or_symlink && self.matcher.matches(&hg_path) {
638 is_ignored
639 } else {
640 if file_or_symlink {
641 if self.matcher.matches(&hg_path) {
627 642 self.mark_unknown_or_ignored(
628 643 has_ignored_ancestor,
629 644 &BorrowedPath::InMemory(&hg_path),
630 645 )
646 } else {
647 // We haven’t computed whether this path is ignored. It
648 // might not be, and a future run of status might have a
649 // different matcher that matches it. So treat it as not
650 // ignored. That is, inhibit readdir caching of the parent
651 // directory.
652 false
653 }
654 } else {
655 // This is neither a directory, a plain file, or a symlink.
656 // Treat it like an ignored file.
657 true
658 }
631 659 }
632 660 }
633 661
662 /// Returns whether that path is ignored
634 663 fn mark_unknown_or_ignored(
635 664 &self,
636 665 has_ignored_ancestor: bool,
637 666 hg_path: &BorrowedPath<'_, 'on_disk>,
638 ) {
667 ) -> bool {
639 668 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
640 669 if is_ignored {
641 670 if self.options.list_ignored {
642 671 self.outcome
643 672 .lock()
644 673 .unwrap()
645 674 .ignored
646 675 .push(hg_path.detach_from_tree())
647 676 }
648 677 } else {
649 678 if self.options.list_unknown {
650 679 self.outcome
651 680 .lock()
652 681 .unwrap()
653 682 .unknown
654 683 .push(hg_path.detach_from_tree())
655 684 }
656 685 }
686 is_ignored
657 687 }
658 688 }
659 689
660 690 #[cfg(unix)] // TODO
661 691 fn mtime_seconds(metadata: &std::fs::Metadata) -> i64 {
662 692 // Going through `Metadata::modified()` would be portable, but would take
663 693 // care to construct a `SystemTime` value with sub-second precision just
664 694 // for us to throw that away here.
665 695 use std::os::unix::fs::MetadataExt;
666 696 metadata.mtime()
667 697 }
668 698
669 699 struct DirEntry {
670 700 base_name: HgPathBuf,
671 701 full_path: PathBuf,
672 702 metadata: std::fs::Metadata,
673 703 }
674 704
675 705 impl DirEntry {
676 706 /// Returns **unsorted** entries in the given directory, with name and
677 707 /// metadata.
678 708 ///
679 709 /// If a `.hg` sub-directory is encountered:
680 710 ///
681 711 /// * At the repository root, ignore that sub-directory
682 712 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
683 713 /// list instead.
684 714 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
685 715 let mut results = Vec::new();
686 716 for entry in path.read_dir()? {
687 717 let entry = entry?;
688 718 let metadata = entry.metadata()?;
689 719 let name = get_bytes_from_os_string(entry.file_name());
690 720 // FIXME don't do this when cached
691 721 if name == b".hg" {
692 722 if is_at_repo_root {
693 723 // Skip the repo’s own .hg (might be a symlink)
694 724 continue;
695 725 } else if metadata.is_dir() {
696 726 // A .hg sub-directory at another location means a subrepo,
697 727 // skip it entirely.
698 728 return Ok(Vec::new());
699 729 }
700 730 }
701 731 results.push(DirEntry {
702 732 base_name: name.into(),
703 733 full_path: entry.path(),
704 734 metadata,
705 735 })
706 736 }
707 737 Ok(results)
708 738 }
709 739 }
710 740
711 741 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
712 742 /// of the give repository.
713 743 ///
714 744 /// This is similar to `SystemTime::now()`, with the result truncated to the
715 745 /// same time resolution as other files’ modification times. Using `.hg`
716 746 /// instead of the system’s default temporary directory (such as `/tmp`) makes
717 747 /// it more likely the temporary file is in the same disk partition as contents
718 748 /// of the working directory, which can matter since different filesystems may
719 749 /// store timestamps with different resolutions.
720 750 ///
721 751 /// This may fail, typically if we lack write permissions. In that case we
722 752 /// should continue the `status()` algoritm anyway and consider the current
723 753 /// date/time to be unknown.
724 754 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
725 755 tempfile::tempfile_in(repo_root.join(".hg"))?
726 756 .metadata()?
727 757 .modified()
728 758 }
General Comments 0
You need to be logged in to leave comments. Login now