##// END OF EJS Templates
dirstate-v2: Only convert from SystemTime to Timestamp and not back...
Simon Sapin -
r49004:0cc0c097 default
parent child Browse files
Show More
@@ -1,733 +1,721 b''
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
4 4
5 5 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
6 6 use crate::dirstate_tree::path_with_basename::WithBasename;
7 7 use crate::errors::HgError;
8 8 use crate::utils::hg_path::HgPath;
9 9 use crate::DirstateEntry;
10 10 use crate::DirstateError;
11 11 use crate::DirstateParents;
12 12 use bitflags::bitflags;
13 13 use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be};
14 14 use bytes_cast::BytesCast;
15 15 use format_bytes::format_bytes;
16 16 use std::borrow::Cow;
17 17 use std::convert::{TryFrom, TryInto};
18 use std::time::{Duration, SystemTime, UNIX_EPOCH};
18 use std::time::{SystemTime, UNIX_EPOCH};
19 19
20 20 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
21 21 /// This a redundant sanity check more than an actual "magic number" since
22 22 /// `.hg/requires` already governs which format should be used.
23 23 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
24 24
25 25 /// Keep space for 256-bit hashes
26 26 const STORED_NODE_ID_BYTES: usize = 32;
27 27
28 28 /// … even though only 160 bits are used for now, with SHA-1
29 29 const USED_NODE_ID_BYTES: usize = 20;
30 30
31 31 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
32 32 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
33 33
34 34 /// Must match the constant of the same name in
35 35 /// `mercurial/dirstateutils/docket.py`
36 36 const TREE_METADATA_SIZE: usize = 44;
37 37
38 38 /// Make sure that size-affecting changes are made knowingly
39 39 #[allow(unused)]
40 40 fn static_assert_size_of() {
41 41 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
42 42 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
43 43 let _ = std::mem::transmute::<Node, [u8; 43]>;
44 44 }
45 45
46 46 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
47 47 #[derive(BytesCast)]
48 48 #[repr(C)]
49 49 struct DocketHeader {
50 50 marker: [u8; V2_FORMAT_MARKER.len()],
51 51 parent_1: [u8; STORED_NODE_ID_BYTES],
52 52 parent_2: [u8; STORED_NODE_ID_BYTES],
53 53
54 54 metadata: TreeMetadata,
55 55
56 56 /// Counted in bytes
57 57 data_size: Size,
58 58
59 59 uuid_size: u8,
60 60 }
61 61
62 62 pub struct Docket<'on_disk> {
63 63 header: &'on_disk DocketHeader,
64 64 uuid: &'on_disk [u8],
65 65 }
66 66
67 67 /// Fields are documented in the *Tree metadata in the docket file*
68 68 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
69 69 #[derive(BytesCast)]
70 70 #[repr(C)]
71 71 struct TreeMetadata {
72 72 root_nodes: ChildNodes,
73 73 nodes_with_entry_count: Size,
74 74 nodes_with_copy_source_count: Size,
75 75 unreachable_bytes: Size,
76 76 unused: [u8; 4],
77 77
78 78 /// See *Optional hash of ignore patterns* section of
79 79 /// `mercurial/helptext/internals/dirstate-v2.txt`
80 80 ignore_patterns_hash: IgnorePatternsHash,
81 81 }
82 82
83 83 /// Fields are documented in the *The data file format*
84 84 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
85 85 #[derive(BytesCast)]
86 86 #[repr(C)]
87 87 pub(super) struct Node {
88 88 full_path: PathSlice,
89 89
90 90 /// In bytes from `self.full_path.start`
91 91 base_name_start: PathSize,
92 92
93 93 copy_source: OptPathSlice,
94 94 children: ChildNodes,
95 95 pub(super) descendants_with_entry_count: Size,
96 96 pub(super) tracked_descendants_count: Size,
97 97 flags: Flags,
98 98 data: Entry,
99 99 }
100 100
101 101 bitflags! {
102 102 #[derive(BytesCast)]
103 103 #[repr(C)]
104 104 struct Flags: u8 {
105 105 const WDIR_TRACKED = 1 << 0;
106 106 const P1_TRACKED = 1 << 1;
107 107 const P2_INFO = 1 << 2;
108 108 const HAS_MODE_AND_SIZE = 1 << 3;
109 109 const HAS_MTIME = 1 << 4;
110 110 }
111 111 }
112 112
113 113 #[derive(BytesCast, Copy, Clone, Debug)]
114 114 #[repr(C)]
115 115 struct Entry {
116 116 mode: I32Be,
117 117 size: I32Be,
118 118 mtime: I32Be,
119 119 }
120 120
121 121 /// Duration since the Unix epoch
122 122 #[derive(BytesCast, Copy, Clone, PartialEq)]
123 123 #[repr(C)]
124 124 pub(super) struct Timestamp {
125 125 seconds: I64Be,
126 126
127 127 /// In `0 .. 1_000_000_000`.
128 128 ///
129 129 /// This timestamp is later or earlier than `(seconds, 0)` by this many
130 130 /// nanoseconds, if `seconds` is non-negative or negative, respectively.
131 131 nanoseconds: U32Be,
132 132 }
133 133
134 134 /// Counted in bytes from the start of the file
135 135 ///
136 136 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
137 137 type Offset = U32Be;
138 138
139 139 /// Counted in number of items
140 140 ///
141 141 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
142 142 type Size = U32Be;
143 143
144 144 /// Counted in bytes
145 145 ///
146 146 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
147 147 type PathSize = U16Be;
148 148
149 149 /// A contiguous sequence of `len` times `Node`, representing the child nodes
150 150 /// of either some other node or of the repository root.
151 151 ///
152 152 /// Always sorted by ascending `full_path`, to allow binary search.
153 153 /// Since nodes with the same parent nodes also have the same parent path,
154 154 /// only the `base_name`s need to be compared during binary search.
155 155 #[derive(BytesCast, Copy, Clone)]
156 156 #[repr(C)]
157 157 struct ChildNodes {
158 158 start: Offset,
159 159 len: Size,
160 160 }
161 161
162 162 /// A `HgPath` of `len` bytes
163 163 #[derive(BytesCast, Copy, Clone)]
164 164 #[repr(C)]
165 165 struct PathSlice {
166 166 start: Offset,
167 167 len: PathSize,
168 168 }
169 169
170 170 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
171 171 type OptPathSlice = PathSlice;
172 172
173 173 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
174 174 ///
175 175 /// This should only happen if Mercurial is buggy or a repository is corrupted.
176 176 #[derive(Debug)]
177 177 pub struct DirstateV2ParseError;
178 178
179 179 impl From<DirstateV2ParseError> for HgError {
180 180 fn from(_: DirstateV2ParseError) -> Self {
181 181 HgError::corrupted("dirstate-v2 parse error")
182 182 }
183 183 }
184 184
185 185 impl From<DirstateV2ParseError> for crate::DirstateError {
186 186 fn from(error: DirstateV2ParseError) -> Self {
187 187 HgError::from(error).into()
188 188 }
189 189 }
190 190
191 191 impl<'on_disk> Docket<'on_disk> {
192 192 pub fn parents(&self) -> DirstateParents {
193 193 use crate::Node;
194 194 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
195 195 .unwrap()
196 196 .clone();
197 197 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
198 198 .unwrap()
199 199 .clone();
200 200 DirstateParents { p1, p2 }
201 201 }
202 202
203 203 pub fn tree_metadata(&self) -> &[u8] {
204 204 self.header.metadata.as_bytes()
205 205 }
206 206
207 207 pub fn data_size(&self) -> usize {
208 208 // This `unwrap` could only panic on a 16-bit CPU
209 209 self.header.data_size.get().try_into().unwrap()
210 210 }
211 211
212 212 pub fn data_filename(&self) -> String {
213 213 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
214 214 }
215 215 }
216 216
217 217 pub fn read_docket(
218 218 on_disk: &[u8],
219 219 ) -> Result<Docket<'_>, DirstateV2ParseError> {
220 220 let (header, uuid) =
221 221 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
222 222 let uuid_size = header.uuid_size as usize;
223 223 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
224 224 Ok(Docket { header, uuid })
225 225 } else {
226 226 Err(DirstateV2ParseError)
227 227 }
228 228 }
229 229
230 230 pub(super) fn read<'on_disk>(
231 231 on_disk: &'on_disk [u8],
232 232 metadata: &[u8],
233 233 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
234 234 if on_disk.is_empty() {
235 235 return Ok(DirstateMap::empty(on_disk));
236 236 }
237 237 let (meta, _) = TreeMetadata::from_bytes(metadata)
238 238 .map_err(|_| DirstateV2ParseError)?;
239 239 let dirstate_map = DirstateMap {
240 240 on_disk,
241 241 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
242 242 on_disk,
243 243 meta.root_nodes,
244 244 )?),
245 245 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
246 246 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
247 247 ignore_patterns_hash: meta.ignore_patterns_hash,
248 248 unreachable_bytes: meta.unreachable_bytes.get(),
249 249 };
250 250 Ok(dirstate_map)
251 251 }
252 252
253 253 impl Node {
254 254 pub(super) fn full_path<'on_disk>(
255 255 &self,
256 256 on_disk: &'on_disk [u8],
257 257 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
258 258 read_hg_path(on_disk, self.full_path)
259 259 }
260 260
261 261 pub(super) fn base_name_start<'on_disk>(
262 262 &self,
263 263 ) -> Result<usize, DirstateV2ParseError> {
264 264 let start = self.base_name_start.get();
265 265 if start < self.full_path.len.get() {
266 266 let start = usize::try_from(start)
267 267 // u32 -> usize, could only panic on a 16-bit CPU
268 268 .expect("dirstate-v2 base_name_start out of bounds");
269 269 Ok(start)
270 270 } else {
271 271 Err(DirstateV2ParseError)
272 272 }
273 273 }
274 274
275 275 pub(super) fn base_name<'on_disk>(
276 276 &self,
277 277 on_disk: &'on_disk [u8],
278 278 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
279 279 let full_path = self.full_path(on_disk)?;
280 280 let base_name_start = self.base_name_start()?;
281 281 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
282 282 }
283 283
284 284 pub(super) fn path<'on_disk>(
285 285 &self,
286 286 on_disk: &'on_disk [u8],
287 287 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
288 288 Ok(WithBasename::from_raw_parts(
289 289 Cow::Borrowed(self.full_path(on_disk)?),
290 290 self.base_name_start()?,
291 291 ))
292 292 }
293 293
294 294 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
295 295 self.copy_source.start.get() != 0
296 296 }
297 297
298 298 pub(super) fn copy_source<'on_disk>(
299 299 &self,
300 300 on_disk: &'on_disk [u8],
301 301 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
302 302 Ok(if self.has_copy_source() {
303 303 Some(read_hg_path(on_disk, self.copy_source)?)
304 304 } else {
305 305 None
306 306 })
307 307 }
308 308
309 309 fn has_entry(&self) -> bool {
310 310 self.flags.intersects(
311 311 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
312 312 )
313 313 }
314 314
315 315 pub(super) fn node_data(
316 316 &self,
317 317 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
318 318 if self.has_entry() {
319 319 Ok(dirstate_map::NodeData::Entry(self.assume_entry()))
320 320 } else if let Some(&mtime) = self.cached_directory_mtime() {
321 321 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
322 322 } else {
323 323 Ok(dirstate_map::NodeData::None)
324 324 }
325 325 }
326 326
327 327 pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {
328 328 if self.flags.contains(Flags::HAS_MTIME) && !self.has_entry() {
329 329 Some(self.data.as_timestamp())
330 330 } else {
331 331 None
332 332 }
333 333 }
334 334
335 335 fn assume_entry(&self) -> DirstateEntry {
336 336 // TODO: convert through raw bits instead?
337 337 let wdir_tracked = self.flags.contains(Flags::WDIR_TRACKED);
338 338 let p1_tracked = self.flags.contains(Flags::P1_TRACKED);
339 339 let p2_info = self.flags.contains(Flags::P2_INFO);
340 340 let mode_size = if self.flags.contains(Flags::HAS_MODE_AND_SIZE) {
341 341 Some((self.data.mode.into(), self.data.size.into()))
342 342 } else {
343 343 None
344 344 };
345 345 let mtime = if self.flags.contains(Flags::HAS_MTIME) {
346 346 Some(self.data.mtime.into())
347 347 } else {
348 348 None
349 349 };
350 350 DirstateEntry::from_v2_data(
351 351 wdir_tracked,
352 352 p1_tracked,
353 353 p2_info,
354 354 mode_size,
355 355 mtime,
356 356 )
357 357 }
358 358
359 359 pub(super) fn entry(
360 360 &self,
361 361 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
362 362 if self.has_entry() {
363 363 Ok(Some(self.assume_entry()))
364 364 } else {
365 365 Ok(None)
366 366 }
367 367 }
368 368
369 369 pub(super) fn children<'on_disk>(
370 370 &self,
371 371 on_disk: &'on_disk [u8],
372 372 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
373 373 read_nodes(on_disk, self.children)
374 374 }
375 375
376 376 pub(super) fn to_in_memory_node<'on_disk>(
377 377 &self,
378 378 on_disk: &'on_disk [u8],
379 379 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
380 380 Ok(dirstate_map::Node {
381 381 children: dirstate_map::ChildNodes::OnDisk(
382 382 self.children(on_disk)?,
383 383 ),
384 384 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
385 385 data: self.node_data()?,
386 386 descendants_with_entry_count: self
387 387 .descendants_with_entry_count
388 388 .get(),
389 389 tracked_descendants_count: self.tracked_descendants_count.get(),
390 390 })
391 391 }
392 392 }
393 393
394 394 impl Entry {
395 395 fn from_dirstate_entry(entry: &DirstateEntry) -> (Flags, Self) {
396 396 let (wdir_tracked, p1_tracked, p2_info, mode_size_opt, mtime_opt) =
397 397 entry.v2_data();
398 398 // TODO: convert throug raw flag bits instead?
399 399 let mut flags = Flags::empty();
400 400 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
401 401 flags.set(Flags::P1_TRACKED, p1_tracked);
402 402 flags.set(Flags::P2_INFO, p2_info);
403 403 let (mode, size, mtime);
404 404 if let Some((m, s)) = mode_size_opt {
405 405 mode = m;
406 406 size = s;
407 407 flags.insert(Flags::HAS_MODE_AND_SIZE)
408 408 } else {
409 409 mode = 0;
410 410 size = 0;
411 411 }
412 412 if let Some(m) = mtime_opt {
413 413 mtime = m;
414 414 flags.insert(Flags::HAS_MTIME);
415 415 } else {
416 416 mtime = 0;
417 417 }
418 418 let raw_entry = Entry {
419 419 mode: mode.into(),
420 420 size: size.into(),
421 421 mtime: mtime.into(),
422 422 };
423 423 (flags, raw_entry)
424 424 }
425 425
426 426 fn from_timestamp(timestamp: Timestamp) -> Self {
427 427 // Safety: both types implement the `ByteCast` trait, so we could
428 428 // safely use `as_bytes` and `from_bytes` to do this conversion. Using
429 429 // `transmute` instead makes the compiler check that the two types
430 430 // have the same size, which eliminates the error case of
431 431 // `from_bytes`.
432 432 unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }
433 433 }
434 434
435 435 fn as_timestamp(&self) -> &Timestamp {
436 436 // Safety: same as above in `from_timestamp`
437 437 unsafe { &*(self as *const Entry as *const Timestamp) }
438 438 }
439 439 }
440 440
441 441 impl Timestamp {
442 442 pub fn seconds(&self) -> i64 {
443 443 self.seconds.get()
444 444 }
445 445 }
446 446
447 447 impl From<SystemTime> for Timestamp {
448 448 fn from(system_time: SystemTime) -> Self {
449 449 let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {
450 450 Ok(duration) => {
451 451 (duration.as_secs() as i64, duration.subsec_nanos())
452 452 }
453 453 Err(error) => {
454 454 let negative = error.duration();
455 455 (-(negative.as_secs() as i64), negative.subsec_nanos())
456 456 }
457 457 };
458 458 Timestamp {
459 459 seconds: secs.into(),
460 460 nanoseconds: nanos.into(),
461 461 }
462 462 }
463 463 }
464 464
465 impl From<&'_ Timestamp> for SystemTime {
466 fn from(timestamp: &'_ Timestamp) -> Self {
467 let secs = timestamp.seconds.get();
468 let nanos = timestamp.nanoseconds.get();
469 if secs >= 0 {
470 UNIX_EPOCH + Duration::new(secs as u64, nanos)
471 } else {
472 UNIX_EPOCH - Duration::new((-secs) as u64, nanos)
473 }
474 }
475 }
476
477 465 fn read_hg_path(
478 466 on_disk: &[u8],
479 467 slice: PathSlice,
480 468 ) -> Result<&HgPath, DirstateV2ParseError> {
481 469 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
482 470 }
483 471
484 472 fn read_nodes(
485 473 on_disk: &[u8],
486 474 slice: ChildNodes,
487 475 ) -> Result<&[Node], DirstateV2ParseError> {
488 476 read_slice(on_disk, slice.start, slice.len.get())
489 477 }
490 478
491 479 fn read_slice<T, Len>(
492 480 on_disk: &[u8],
493 481 start: Offset,
494 482 len: Len,
495 483 ) -> Result<&[T], DirstateV2ParseError>
496 484 where
497 485 T: BytesCast,
498 486 Len: TryInto<usize>,
499 487 {
500 488 // Either `usize::MAX` would result in "out of bounds" error since a single
501 489 // `&[u8]` cannot occupy the entire addess space.
502 490 let start = start.get().try_into().unwrap_or(std::usize::MAX);
503 491 let len = len.try_into().unwrap_or(std::usize::MAX);
504 492 on_disk
505 493 .get(start..)
506 494 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
507 495 .map(|(slice, _rest)| slice)
508 496 .ok_or_else(|| DirstateV2ParseError)
509 497 }
510 498
511 499 pub(crate) fn for_each_tracked_path<'on_disk>(
512 500 on_disk: &'on_disk [u8],
513 501 metadata: &[u8],
514 502 mut f: impl FnMut(&'on_disk HgPath),
515 503 ) -> Result<(), DirstateV2ParseError> {
516 504 let (meta, _) = TreeMetadata::from_bytes(metadata)
517 505 .map_err(|_| DirstateV2ParseError)?;
518 506 fn recur<'on_disk>(
519 507 on_disk: &'on_disk [u8],
520 508 nodes: ChildNodes,
521 509 f: &mut impl FnMut(&'on_disk HgPath),
522 510 ) -> Result<(), DirstateV2ParseError> {
523 511 for node in read_nodes(on_disk, nodes)? {
524 512 if let Some(entry) = node.entry()? {
525 513 if entry.state().is_tracked() {
526 514 f(node.full_path(on_disk)?)
527 515 }
528 516 }
529 517 recur(on_disk, node.children, f)?
530 518 }
531 519 Ok(())
532 520 }
533 521 recur(on_disk, meta.root_nodes, &mut f)
534 522 }
535 523
536 524 /// Returns new data and metadata, together with whether that data should be
537 525 /// appended to the existing data file whose content is at
538 526 /// `dirstate_map.on_disk` (true), instead of written to a new data file
539 527 /// (false).
540 528 pub(super) fn write(
541 529 dirstate_map: &mut DirstateMap,
542 530 can_append: bool,
543 531 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
544 532 let append = can_append && dirstate_map.write_should_append();
545 533
546 534 // This ignores the space for paths, and for nodes without an entry.
547 535 // TODO: better estimate? Skip the `Vec` and write to a file directly?
548 536 let size_guess = std::mem::size_of::<Node>()
549 537 * dirstate_map.nodes_with_entry_count as usize;
550 538
551 539 let mut writer = Writer {
552 540 dirstate_map,
553 541 append,
554 542 out: Vec::with_capacity(size_guess),
555 543 };
556 544
557 545 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
558 546
559 547 let meta = TreeMetadata {
560 548 root_nodes,
561 549 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
562 550 nodes_with_copy_source_count: dirstate_map
563 551 .nodes_with_copy_source_count
564 552 .into(),
565 553 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
566 554 unused: [0; 4],
567 555 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
568 556 };
569 557 Ok((writer.out, meta.as_bytes().to_vec(), append))
570 558 }
571 559
572 560 struct Writer<'dmap, 'on_disk> {
573 561 dirstate_map: &'dmap DirstateMap<'on_disk>,
574 562 append: bool,
575 563 out: Vec<u8>,
576 564 }
577 565
578 566 impl Writer<'_, '_> {
579 567 fn write_nodes(
580 568 &mut self,
581 569 nodes: dirstate_map::ChildNodesRef,
582 570 ) -> Result<ChildNodes, DirstateError> {
583 571 // Reuse already-written nodes if possible
584 572 if self.append {
585 573 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
586 574 let start = self.on_disk_offset_of(nodes_slice).expect(
587 575 "dirstate-v2 OnDisk nodes not found within on_disk",
588 576 );
589 577 let len = child_nodes_len_from_usize(nodes_slice.len());
590 578 return Ok(ChildNodes { start, len });
591 579 }
592 580 }
593 581
594 582 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
595 583 // undefined iteration order. Sort to enable binary search in the
596 584 // written file.
597 585 let nodes = nodes.sorted();
598 586 let nodes_len = nodes.len();
599 587
600 588 // First accumulate serialized nodes in a `Vec`
601 589 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
602 590 for node in nodes {
603 591 let children =
604 592 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
605 593 let full_path = node.full_path(self.dirstate_map.on_disk)?;
606 594 let full_path = self.write_path(full_path.as_bytes());
607 595 let copy_source = if let Some(source) =
608 596 node.copy_source(self.dirstate_map.on_disk)?
609 597 {
610 598 self.write_path(source.as_bytes())
611 599 } else {
612 600 PathSlice {
613 601 start: 0.into(),
614 602 len: 0.into(),
615 603 }
616 604 };
617 605 on_disk_nodes.push(match node {
618 606 NodeRef::InMemory(path, node) => {
619 607 let (flags, data) = match &node.data {
620 608 dirstate_map::NodeData::Entry(entry) => {
621 609 Entry::from_dirstate_entry(entry)
622 610 }
623 611 dirstate_map::NodeData::CachedDirectory { mtime } => {
624 612 (Flags::HAS_MTIME, Entry::from_timestamp(*mtime))
625 613 }
626 614 dirstate_map::NodeData::None => (
627 615 Flags::empty(),
628 616 Entry {
629 617 mode: 0.into(),
630 618 size: 0.into(),
631 619 mtime: 0.into(),
632 620 },
633 621 ),
634 622 };
635 623 Node {
636 624 children,
637 625 copy_source,
638 626 full_path,
639 627 base_name_start: u16::try_from(path.base_name_start())
640 628 // Could only panic for paths over 64 KiB
641 629 .expect("dirstate-v2 path length overflow")
642 630 .into(),
643 631 descendants_with_entry_count: node
644 632 .descendants_with_entry_count
645 633 .into(),
646 634 tracked_descendants_count: node
647 635 .tracked_descendants_count
648 636 .into(),
649 637 flags,
650 638 data,
651 639 }
652 640 }
653 641 NodeRef::OnDisk(node) => Node {
654 642 children,
655 643 copy_source,
656 644 full_path,
657 645 ..*node
658 646 },
659 647 })
660 648 }
661 649 // … so we can write them contiguously, after writing everything else
662 650 // they refer to.
663 651 let start = self.current_offset();
664 652 let len = child_nodes_len_from_usize(nodes_len);
665 653 self.out.extend(on_disk_nodes.as_bytes());
666 654 Ok(ChildNodes { start, len })
667 655 }
668 656
669 657 /// If the given slice of items is within `on_disk`, returns its offset
670 658 /// from the start of `on_disk`.
671 659 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
672 660 where
673 661 T: BytesCast,
674 662 {
675 663 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
676 664 let start = slice.as_ptr() as usize;
677 665 let end = start + slice.len();
678 666 start..=end
679 667 }
680 668 let slice_addresses = address_range(slice.as_bytes());
681 669 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
682 670 if on_disk_addresses.contains(slice_addresses.start())
683 671 && on_disk_addresses.contains(slice_addresses.end())
684 672 {
685 673 let offset = slice_addresses.start() - on_disk_addresses.start();
686 674 Some(offset_from_usize(offset))
687 675 } else {
688 676 None
689 677 }
690 678 }
691 679
692 680 fn current_offset(&mut self) -> Offset {
693 681 let mut offset = self.out.len();
694 682 if self.append {
695 683 offset += self.dirstate_map.on_disk.len()
696 684 }
697 685 offset_from_usize(offset)
698 686 }
699 687
700 688 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
701 689 let len = path_len_from_usize(slice.len());
702 690 // Reuse an already-written path if possible
703 691 if self.append {
704 692 if let Some(start) = self.on_disk_offset_of(slice) {
705 693 return PathSlice { start, len };
706 694 }
707 695 }
708 696 let start = self.current_offset();
709 697 self.out.extend(slice.as_bytes());
710 698 PathSlice { start, len }
711 699 }
712 700 }
713 701
714 702 fn offset_from_usize(x: usize) -> Offset {
715 703 u32::try_from(x)
716 704 // Could only panic for a dirstate file larger than 4 GiB
717 705 .expect("dirstate-v2 offset overflow")
718 706 .into()
719 707 }
720 708
721 709 fn child_nodes_len_from_usize(x: usize) -> Size {
722 710 u32::try_from(x)
723 711 // Could only panic with over 4 billion nodes
724 712 .expect("dirstate-v2 slice length overflow")
725 713 .into()
726 714 }
727 715
728 716 fn path_len_from_usize(x: usize) -> PathSize {
729 717 u16::try_from(x)
730 718 // Could only panic for paths over 64 KiB
731 719 .expect("dirstate-v2 path length overflow")
732 720 .into()
733 721 }
@@ -1,753 +1,754 b''
1 1 use crate::dirstate::status::IgnoreFnType;
2 2 use crate::dirstate_tree::dirstate_map::BorrowedPath;
3 3 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
4 4 use crate::dirstate_tree::dirstate_map::DirstateMap;
5 5 use crate::dirstate_tree::dirstate_map::NodeData;
6 6 use crate::dirstate_tree::dirstate_map::NodeRef;
7 7 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
8 8 use crate::dirstate_tree::on_disk::Timestamp;
9 9 use crate::matchers::get_ignore_function;
10 10 use crate::matchers::Matcher;
11 11 use crate::utils::files::get_bytes_from_os_string;
12 12 use crate::utils::files::get_path_from_bytes;
13 13 use crate::utils::hg_path::HgPath;
14 14 use crate::BadMatch;
15 15 use crate::DirstateStatus;
16 16 use crate::EntryState;
17 17 use crate::HgPathBuf;
18 18 use crate::PatternFileWarning;
19 19 use crate::StatusError;
20 20 use crate::StatusOptions;
21 21 use micro_timer::timed;
22 22 use rayon::prelude::*;
23 23 use sha1::{Digest, Sha1};
24 24 use std::borrow::Cow;
25 25 use std::io;
26 26 use std::path::Path;
27 27 use std::path::PathBuf;
28 28 use std::sync::Mutex;
29 29 use std::time::SystemTime;
30 30
31 31 /// Returns the status of the working directory compared to its parent
32 32 /// changeset.
33 33 ///
34 34 /// This algorithm is based on traversing the filesystem tree (`fs` in function
35 35 /// and variable names) and dirstate tree at the same time. The core of this
36 36 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
37 37 /// and its use of `itertools::merge_join_by`. When reaching a path that only
38 38 /// exists in one of the two trees, depending on information requested by
39 39 /// `options` we may need to traverse the remaining subtree.
40 40 #[timed]
41 41 pub fn status<'tree, 'on_disk: 'tree>(
42 42 dmap: &'tree mut DirstateMap<'on_disk>,
43 43 matcher: &(dyn Matcher + Sync),
44 44 root_dir: PathBuf,
45 45 ignore_files: Vec<PathBuf>,
46 46 options: StatusOptions,
47 47 ) -> Result<(DirstateStatus<'on_disk>, Vec<PatternFileWarning>), StatusError> {
48 48 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
49 49 if options.list_ignored || options.list_unknown {
50 50 let mut hasher = Sha1::new();
51 51 let (ignore_fn, warnings) = get_ignore_function(
52 52 ignore_files,
53 53 &root_dir,
54 54 &mut |pattern_bytes| hasher.update(pattern_bytes),
55 55 )?;
56 56 let new_hash = *hasher.finalize().as_ref();
57 57 let changed = new_hash != dmap.ignore_patterns_hash;
58 58 dmap.ignore_patterns_hash = new_hash;
59 59 (ignore_fn, warnings, Some(changed))
60 60 } else {
61 61 (Box::new(|&_| true), vec![], None)
62 62 };
63 63
64 64 let common = StatusCommon {
65 65 dmap,
66 66 options,
67 67 matcher,
68 68 ignore_fn,
69 69 outcome: Default::default(),
70 70 ignore_patterns_have_changed: patterns_changed,
71 71 new_cachable_directories: Default::default(),
72 72 outated_cached_directories: Default::default(),
73 73 filesystem_time_at_status_start: filesystem_now(&root_dir).ok(),
74 74 };
75 75 let is_at_repo_root = true;
76 76 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
77 77 let has_ignored_ancestor = false;
78 78 let root_cached_mtime = None;
79 79 let root_dir_metadata = None;
80 80 // If the path we have for the repository root is a symlink, do follow it.
81 81 // (As opposed to symlinks within the working directory which are not
82 82 // followed, using `std::fs::symlink_metadata`.)
83 83 common.traverse_fs_directory_and_dirstate(
84 84 has_ignored_ancestor,
85 85 dmap.root.as_ref(),
86 86 hg_path,
87 87 &root_dir,
88 88 root_dir_metadata,
89 89 root_cached_mtime,
90 90 is_at_repo_root,
91 91 )?;
92 92 let mut outcome = common.outcome.into_inner().unwrap();
93 93 let new_cachable = common.new_cachable_directories.into_inner().unwrap();
94 94 let outdated = common.outated_cached_directories.into_inner().unwrap();
95 95
96 96 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
97 97 || !outdated.is_empty()
98 98 || !new_cachable.is_empty();
99 99
100 100 // Remove outdated mtimes before adding new mtimes, in case a given
101 101 // directory is both
102 102 for path in &outdated {
103 103 let node = dmap.get_or_insert(path)?;
104 104 if let NodeData::CachedDirectory { .. } = &node.data {
105 105 node.data = NodeData::None
106 106 }
107 107 }
108 108 for (path, mtime) in &new_cachable {
109 109 let node = dmap.get_or_insert(path)?;
110 110 match &node.data {
111 111 NodeData::Entry(_) => {} // Don’t overwrite an entry
112 112 NodeData::CachedDirectory { .. } | NodeData::None => {
113 113 node.data = NodeData::CachedDirectory { mtime: *mtime }
114 114 }
115 115 }
116 116 }
117 117
118 118 Ok((outcome, warnings))
119 119 }
120 120
121 121 /// Bag of random things needed by various parts of the algorithm. Reduces the
122 122 /// number of parameters passed to functions.
123 123 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
124 124 dmap: &'tree DirstateMap<'on_disk>,
125 125 options: StatusOptions,
126 126 matcher: &'a (dyn Matcher + Sync),
127 127 ignore_fn: IgnoreFnType<'a>,
128 128 outcome: Mutex<DirstateStatus<'on_disk>>,
129 129 new_cachable_directories: Mutex<Vec<(Cow<'on_disk, HgPath>, Timestamp)>>,
130 130 outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
131 131
132 132 /// Whether ignore files like `.hgignore` have changed since the previous
133 133 /// time a `status()` call wrote their hash to the dirstate. `None` means
134 134 /// we don’t know as this run doesn’t list either ignored or uknown files
135 135 /// and therefore isn’t reading `.hgignore`.
136 136 ignore_patterns_have_changed: Option<bool>,
137 137
138 138 /// The current time at the start of the `status()` algorithm, as measured
139 139 /// and possibly truncated by the filesystem.
140 140 filesystem_time_at_status_start: Option<SystemTime>,
141 141 }
142 142
143 143 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
144 144 fn read_dir(
145 145 &self,
146 146 hg_path: &HgPath,
147 147 fs_path: &Path,
148 148 is_at_repo_root: bool,
149 149 ) -> Result<Vec<DirEntry>, ()> {
150 150 DirEntry::read_dir(fs_path, is_at_repo_root)
151 151 .map_err(|error| self.io_error(error, hg_path))
152 152 }
153 153
154 154 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
155 155 let errno = error.raw_os_error().expect("expected real OS error");
156 156 self.outcome
157 157 .lock()
158 158 .unwrap()
159 159 .bad
160 160 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
161 161 }
162 162
163 163 fn check_for_outdated_directory_cache(
164 164 &self,
165 165 dirstate_node: &NodeRef<'tree, 'on_disk>,
166 166 ) -> Result<(), DirstateV2ParseError> {
167 167 if self.ignore_patterns_have_changed == Some(true)
168 168 && dirstate_node.cached_directory_mtime().is_some()
169 169 {
170 170 self.outated_cached_directories.lock().unwrap().push(
171 171 dirstate_node
172 172 .full_path_borrowed(self.dmap.on_disk)?
173 173 .detach_from_tree(),
174 174 )
175 175 }
176 176 Ok(())
177 177 }
178 178
179 179 /// If this returns true, we can get accurate results by only using
180 180 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
181 181 /// need to call `read_dir`.
182 182 fn can_skip_fs_readdir(
183 183 &self,
184 184 directory_metadata: Option<&std::fs::Metadata>,
185 185 cached_directory_mtime: Option<&Timestamp>,
186 186 ) -> bool {
187 187 if !self.options.list_unknown && !self.options.list_ignored {
188 188 // All states that we care about listing have corresponding
189 189 // dirstate entries.
190 190 // This happens for example with `hg status -mard`.
191 191 return true;
192 192 }
193 193 if !self.options.list_ignored
194 194 && self.ignore_patterns_have_changed == Some(false)
195 195 {
196 196 if let Some(cached_mtime) = cached_directory_mtime {
197 197 // The dirstate contains a cached mtime for this directory, set
198 198 // by a previous run of the `status` algorithm which found this
199 199 // directory eligible for `read_dir` caching.
200 200 if let Some(meta) = directory_metadata {
201 201 if let Ok(current_mtime) = meta.modified() {
202 if current_mtime == cached_mtime.into() {
202 let current_mtime = Timestamp::from(current_mtime);
203 if current_mtime == *cached_mtime {
203 204 // The mtime of that directory has not changed
204 205 // since then, which means that the results of
205 206 // `read_dir` should also be unchanged.
206 207 return true;
207 208 }
208 209 }
209 210 }
210 211 }
211 212 }
212 213 false
213 214 }
214 215
215 216 /// Returns whether all child entries of the filesystem directory have a
216 217 /// corresponding dirstate node or are ignored.
217 218 fn traverse_fs_directory_and_dirstate(
218 219 &self,
219 220 has_ignored_ancestor: bool,
220 221 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
221 222 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
222 223 directory_fs_path: &Path,
223 224 directory_metadata: Option<&std::fs::Metadata>,
224 225 cached_directory_mtime: Option<&Timestamp>,
225 226 is_at_repo_root: bool,
226 227 ) -> Result<bool, DirstateV2ParseError> {
227 228 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
228 229 {
229 230 dirstate_nodes
230 231 .par_iter()
231 232 .map(|dirstate_node| {
232 233 let fs_path = directory_fs_path.join(get_path_from_bytes(
233 234 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
234 235 ));
235 236 match std::fs::symlink_metadata(&fs_path) {
236 237 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
237 238 &fs_path,
238 239 &fs_metadata,
239 240 dirstate_node,
240 241 has_ignored_ancestor,
241 242 ),
242 243 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
243 244 self.traverse_dirstate_only(dirstate_node)
244 245 }
245 246 Err(error) => {
246 247 let hg_path =
247 248 dirstate_node.full_path(self.dmap.on_disk)?;
248 249 Ok(self.io_error(error, hg_path))
249 250 }
250 251 }
251 252 })
252 253 .collect::<Result<_, _>>()?;
253 254
254 255 // We don’t know, so conservatively say this isn’t the case
255 256 let children_all_have_dirstate_node_or_are_ignored = false;
256 257
257 258 return Ok(children_all_have_dirstate_node_or_are_ignored);
258 259 }
259 260
260 261 let mut fs_entries = if let Ok(entries) = self.read_dir(
261 262 directory_hg_path,
262 263 directory_fs_path,
263 264 is_at_repo_root,
264 265 ) {
265 266 entries
266 267 } else {
267 268 // Treat an unreadable directory (typically because of insufficient
268 269 // permissions) like an empty directory. `self.read_dir` has
269 270 // already called `self.io_error` so a warning will be emitted.
270 271 Vec::new()
271 272 };
272 273
273 274 // `merge_join_by` requires both its input iterators to be sorted:
274 275
275 276 let dirstate_nodes = dirstate_nodes.sorted();
276 277 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
277 278 // https://github.com/rust-lang/rust/issues/34162
278 279 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
279 280
280 281 // Propagate here any error that would happen inside the comparison
281 282 // callback below
282 283 for dirstate_node in &dirstate_nodes {
283 284 dirstate_node.base_name(self.dmap.on_disk)?;
284 285 }
285 286 itertools::merge_join_by(
286 287 dirstate_nodes,
287 288 &fs_entries,
288 289 |dirstate_node, fs_entry| {
289 290 // This `unwrap` never panics because we already propagated
290 291 // those errors above
291 292 dirstate_node
292 293 .base_name(self.dmap.on_disk)
293 294 .unwrap()
294 295 .cmp(&fs_entry.base_name)
295 296 },
296 297 )
297 298 .par_bridge()
298 299 .map(|pair| {
299 300 use itertools::EitherOrBoth::*;
300 301 let has_dirstate_node_or_is_ignored;
301 302 match pair {
302 303 Both(dirstate_node, fs_entry) => {
303 304 self.traverse_fs_and_dirstate(
304 305 &fs_entry.full_path,
305 306 &fs_entry.metadata,
306 307 dirstate_node,
307 308 has_ignored_ancestor,
308 309 )?;
309 310 has_dirstate_node_or_is_ignored = true
310 311 }
311 312 Left(dirstate_node) => {
312 313 self.traverse_dirstate_only(dirstate_node)?;
313 314 has_dirstate_node_or_is_ignored = true;
314 315 }
315 316 Right(fs_entry) => {
316 317 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
317 318 has_ignored_ancestor,
318 319 directory_hg_path,
319 320 fs_entry,
320 321 )
321 322 }
322 323 }
323 324 Ok(has_dirstate_node_or_is_ignored)
324 325 })
325 326 .try_reduce(|| true, |a, b| Ok(a && b))
326 327 }
327 328
328 329 fn traverse_fs_and_dirstate(
329 330 &self,
330 331 fs_path: &Path,
331 332 fs_metadata: &std::fs::Metadata,
332 333 dirstate_node: NodeRef<'tree, 'on_disk>,
333 334 has_ignored_ancestor: bool,
334 335 ) -> Result<(), DirstateV2ParseError> {
335 336 self.check_for_outdated_directory_cache(&dirstate_node)?;
336 337 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
337 338 let file_type = fs_metadata.file_type();
338 339 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
339 340 if !file_or_symlink {
340 341 // If we previously had a file here, it was removed (with
341 342 // `hg rm` or similar) or deleted before it could be
342 343 // replaced by a directory or something else.
343 344 self.mark_removed_or_deleted_if_file(
344 345 &hg_path,
345 346 dirstate_node.state()?,
346 347 );
347 348 }
348 349 if file_type.is_dir() {
349 350 if self.options.collect_traversed_dirs {
350 351 self.outcome
351 352 .lock()
352 353 .unwrap()
353 354 .traversed
354 355 .push(hg_path.detach_from_tree())
355 356 }
356 357 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
357 358 let is_at_repo_root = false;
358 359 let children_all_have_dirstate_node_or_are_ignored = self
359 360 .traverse_fs_directory_and_dirstate(
360 361 is_ignored,
361 362 dirstate_node.children(self.dmap.on_disk)?,
362 363 hg_path,
363 364 fs_path,
364 365 Some(fs_metadata),
365 366 dirstate_node.cached_directory_mtime(),
366 367 is_at_repo_root,
367 368 )?;
368 369 self.maybe_save_directory_mtime(
369 370 children_all_have_dirstate_node_or_are_ignored,
370 371 fs_metadata,
371 372 dirstate_node,
372 373 )?
373 374 } else {
374 375 if file_or_symlink && self.matcher.matches(hg_path) {
375 376 if let Some(state) = dirstate_node.state()? {
376 377 match state {
377 378 EntryState::Added => self
378 379 .outcome
379 380 .lock()
380 381 .unwrap()
381 382 .added
382 383 .push(hg_path.detach_from_tree()),
383 384 EntryState::Removed => self
384 385 .outcome
385 386 .lock()
386 387 .unwrap()
387 388 .removed
388 389 .push(hg_path.detach_from_tree()),
389 390 EntryState::Merged => self
390 391 .outcome
391 392 .lock()
392 393 .unwrap()
393 394 .modified
394 395 .push(hg_path.detach_from_tree()),
395 396 EntryState::Normal => self
396 397 .handle_normal_file(&dirstate_node, fs_metadata)?,
397 398 }
398 399 } else {
399 400 // `node.entry.is_none()` indicates a "directory"
400 401 // node, but the filesystem has a file
401 402 self.mark_unknown_or_ignored(
402 403 has_ignored_ancestor,
403 404 hg_path,
404 405 );
405 406 }
406 407 }
407 408
408 409 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
409 410 {
410 411 self.traverse_dirstate_only(child_node)?
411 412 }
412 413 }
413 414 Ok(())
414 415 }
415 416
416 417 fn maybe_save_directory_mtime(
417 418 &self,
418 419 children_all_have_dirstate_node_or_are_ignored: bool,
419 420 directory_metadata: &std::fs::Metadata,
420 421 dirstate_node: NodeRef<'tree, 'on_disk>,
421 422 ) -> Result<(), DirstateV2ParseError> {
422 423 if children_all_have_dirstate_node_or_are_ignored {
423 424 // All filesystem directory entries from `read_dir` have a
424 425 // corresponding node in the dirstate, so we can reconstitute the
425 426 // names of those entries without calling `read_dir` again.
426 427 if let (Some(status_start), Ok(directory_mtime)) = (
427 428 &self.filesystem_time_at_status_start,
428 429 directory_metadata.modified(),
429 430 ) {
430 431 // Although the Rust standard library’s `SystemTime` type
431 432 // has nanosecond precision, the times reported for a
432 433 // directory’s (or file’s) modified time may have lower
433 434 // resolution based on the filesystem (for example ext3
434 435 // only stores integer seconds), kernel (see
435 436 // https://stackoverflow.com/a/14393315/1162888), etc.
436 437 if &directory_mtime >= status_start {
437 438 // The directory was modified too recently, don’t cache its
438 439 // `read_dir` results.
439 440 //
440 441 // A timeline like this is possible:
441 442 //
442 443 // 1. A change to this directory (direct child was
443 444 // added or removed) cause its mtime to be set
444 445 // (possibly truncated) to `directory_mtime`
445 446 // 2. This `status` algorithm calls `read_dir`
446 447 // 3. An other change is made to the same directory is
447 448 // made so that calling `read_dir` agin would give
448 449 // different results, but soon enough after 1. that
449 450 // the mtime stays the same
450 451 //
451 452 // On a system where the time resolution poor, this
452 453 // scenario is not unlikely if all three steps are caused
453 454 // by the same script.
454 455 } else {
455 456 // We’ve observed (through `status_start`) that time has
456 457 // “progressed” since `directory_mtime`, so any further
457 458 // change to this directory is extremely likely to cause a
458 459 // different mtime.
459 460 //
460 461 // Having the same mtime again is not entirely impossible
461 462 // since the system clock is not monotonous. It could jump
462 463 // backward to some point before `directory_mtime`, then a
463 464 // directory change could potentially happen during exactly
464 465 // the wrong tick.
465 466 //
466 467 // We deem this scenario (unlike the previous one) to be
467 468 // unlikely enough in practice.
468 469 let timestamp = directory_mtime.into();
469 470 let cached = dirstate_node.cached_directory_mtime();
470 471 if cached != Some(&timestamp) {
471 472 let hg_path = dirstate_node
472 473 .full_path_borrowed(self.dmap.on_disk)?
473 474 .detach_from_tree();
474 475 self.new_cachable_directories
475 476 .lock()
476 477 .unwrap()
477 478 .push((hg_path, timestamp))
478 479 }
479 480 }
480 481 }
481 482 }
482 483 Ok(())
483 484 }
484 485
485 486 /// A file with `EntryState::Normal` in the dirstate was found in the
486 487 /// filesystem
487 488 fn handle_normal_file(
488 489 &self,
489 490 dirstate_node: &NodeRef<'tree, 'on_disk>,
490 491 fs_metadata: &std::fs::Metadata,
491 492 ) -> Result<(), DirstateV2ParseError> {
492 493 // Keep the low 31 bits
493 494 fn truncate_u64(value: u64) -> i32 {
494 495 (value & 0x7FFF_FFFF) as i32
495 496 }
496 497 fn truncate_i64(value: i64) -> i32 {
497 498 (value & 0x7FFF_FFFF) as i32
498 499 }
499 500
500 501 let entry = dirstate_node
501 502 .entry()?
502 503 .expect("handle_normal_file called with entry-less node");
503 504 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
504 505 let mode_changed =
505 506 || self.options.check_exec && entry.mode_changed(fs_metadata);
506 507 let size = entry.size();
507 508 let size_changed = size != truncate_u64(fs_metadata.len());
508 509 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
509 510 // issue6456: Size returned may be longer due to encryption
510 511 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
511 512 self.outcome
512 513 .lock()
513 514 .unwrap()
514 515 .unsure
515 516 .push(hg_path.detach_from_tree())
516 517 } else if dirstate_node.has_copy_source()
517 518 || entry.is_from_other_parent()
518 519 || (size >= 0 && (size_changed || mode_changed()))
519 520 {
520 521 self.outcome
521 522 .lock()
522 523 .unwrap()
523 524 .modified
524 525 .push(hg_path.detach_from_tree())
525 526 } else {
526 527 let mtime = mtime_seconds(fs_metadata);
527 528 if truncate_i64(mtime) != entry.mtime()
528 529 || mtime == self.options.last_normal_time
529 530 {
530 531 self.outcome
531 532 .lock()
532 533 .unwrap()
533 534 .unsure
534 535 .push(hg_path.detach_from_tree())
535 536 } else if self.options.list_clean {
536 537 self.outcome
537 538 .lock()
538 539 .unwrap()
539 540 .clean
540 541 .push(hg_path.detach_from_tree())
541 542 }
542 543 }
543 544 Ok(())
544 545 }
545 546
546 547 /// A node in the dirstate tree has no corresponding filesystem entry
547 548 fn traverse_dirstate_only(
548 549 &self,
549 550 dirstate_node: NodeRef<'tree, 'on_disk>,
550 551 ) -> Result<(), DirstateV2ParseError> {
551 552 self.check_for_outdated_directory_cache(&dirstate_node)?;
552 553 self.mark_removed_or_deleted_if_file(
553 554 &dirstate_node.full_path_borrowed(self.dmap.on_disk)?,
554 555 dirstate_node.state()?,
555 556 );
556 557 dirstate_node
557 558 .children(self.dmap.on_disk)?
558 559 .par_iter()
559 560 .map(|child_node| self.traverse_dirstate_only(child_node))
560 561 .collect()
561 562 }
562 563
563 564 /// A node in the dirstate tree has no corresponding *file* on the
564 565 /// filesystem
565 566 ///
566 567 /// Does nothing on a "directory" node
567 568 fn mark_removed_or_deleted_if_file(
568 569 &self,
569 570 hg_path: &BorrowedPath<'tree, 'on_disk>,
570 571 dirstate_node_state: Option<EntryState>,
571 572 ) {
572 573 if let Some(state) = dirstate_node_state {
573 574 if self.matcher.matches(hg_path) {
574 575 if let EntryState::Removed = state {
575 576 self.outcome
576 577 .lock()
577 578 .unwrap()
578 579 .removed
579 580 .push(hg_path.detach_from_tree())
580 581 } else {
581 582 self.outcome
582 583 .lock()
583 584 .unwrap()
584 585 .deleted
585 586 .push(hg_path.detach_from_tree())
586 587 }
587 588 }
588 589 }
589 590 }
590 591
591 592 /// Something in the filesystem has no corresponding dirstate node
592 593 ///
593 594 /// Returns whether that path is ignored
594 595 fn traverse_fs_only(
595 596 &self,
596 597 has_ignored_ancestor: bool,
597 598 directory_hg_path: &HgPath,
598 599 fs_entry: &DirEntry,
599 600 ) -> bool {
600 601 let hg_path = directory_hg_path.join(&fs_entry.base_name);
601 602 let file_type = fs_entry.metadata.file_type();
602 603 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
603 604 if file_type.is_dir() {
604 605 let is_ignored =
605 606 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
606 607 let traverse_children = if is_ignored {
607 608 // Descendants of an ignored directory are all ignored
608 609 self.options.list_ignored
609 610 } else {
610 611 // Descendants of an unknown directory may be either unknown or
611 612 // ignored
612 613 self.options.list_unknown || self.options.list_ignored
613 614 };
614 615 if traverse_children {
615 616 let is_at_repo_root = false;
616 617 if let Ok(children_fs_entries) = self.read_dir(
617 618 &hg_path,
618 619 &fs_entry.full_path,
619 620 is_at_repo_root,
620 621 ) {
621 622 children_fs_entries.par_iter().for_each(|child_fs_entry| {
622 623 self.traverse_fs_only(
623 624 is_ignored,
624 625 &hg_path,
625 626 child_fs_entry,
626 627 );
627 628 })
628 629 }
629 630 }
630 631 if self.options.collect_traversed_dirs {
631 632 self.outcome.lock().unwrap().traversed.push(hg_path.into())
632 633 }
633 634 is_ignored
634 635 } else {
635 636 if file_or_symlink {
636 637 if self.matcher.matches(&hg_path) {
637 638 self.mark_unknown_or_ignored(
638 639 has_ignored_ancestor,
639 640 &BorrowedPath::InMemory(&hg_path),
640 641 )
641 642 } else {
642 643 // We haven’t computed whether this path is ignored. It
643 644 // might not be, and a future run of status might have a
644 645 // different matcher that matches it. So treat it as not
645 646 // ignored. That is, inhibit readdir caching of the parent
646 647 // directory.
647 648 false
648 649 }
649 650 } else {
650 651 // This is neither a directory, a plain file, or a symlink.
651 652 // Treat it like an ignored file.
652 653 true
653 654 }
654 655 }
655 656 }
656 657
657 658 /// Returns whether that path is ignored
658 659 fn mark_unknown_or_ignored(
659 660 &self,
660 661 has_ignored_ancestor: bool,
661 662 hg_path: &BorrowedPath<'_, 'on_disk>,
662 663 ) -> bool {
663 664 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
664 665 if is_ignored {
665 666 if self.options.list_ignored {
666 667 self.outcome
667 668 .lock()
668 669 .unwrap()
669 670 .ignored
670 671 .push(hg_path.detach_from_tree())
671 672 }
672 673 } else {
673 674 if self.options.list_unknown {
674 675 self.outcome
675 676 .lock()
676 677 .unwrap()
677 678 .unknown
678 679 .push(hg_path.detach_from_tree())
679 680 }
680 681 }
681 682 is_ignored
682 683 }
683 684 }
684 685
685 686 #[cfg(unix)] // TODO
686 687 fn mtime_seconds(metadata: &std::fs::Metadata) -> i64 {
687 688 // Going through `Metadata::modified()` would be portable, but would take
688 689 // care to construct a `SystemTime` value with sub-second precision just
689 690 // for us to throw that away here.
690 691 use std::os::unix::fs::MetadataExt;
691 692 metadata.mtime()
692 693 }
693 694
694 695 struct DirEntry {
695 696 base_name: HgPathBuf,
696 697 full_path: PathBuf,
697 698 metadata: std::fs::Metadata,
698 699 }
699 700
700 701 impl DirEntry {
701 702 /// Returns **unsorted** entries in the given directory, with name and
702 703 /// metadata.
703 704 ///
704 705 /// If a `.hg` sub-directory is encountered:
705 706 ///
706 707 /// * At the repository root, ignore that sub-directory
707 708 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
708 709 /// list instead.
709 710 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
710 711 let mut results = Vec::new();
711 712 for entry in path.read_dir()? {
712 713 let entry = entry?;
713 714 let metadata = entry.metadata()?;
714 715 let name = get_bytes_from_os_string(entry.file_name());
715 716 // FIXME don't do this when cached
716 717 if name == b".hg" {
717 718 if is_at_repo_root {
718 719 // Skip the repo’s own .hg (might be a symlink)
719 720 continue;
720 721 } else if metadata.is_dir() {
721 722 // A .hg sub-directory at another location means a subrepo,
722 723 // skip it entirely.
723 724 return Ok(Vec::new());
724 725 }
725 726 }
726 727 results.push(DirEntry {
727 728 base_name: name.into(),
728 729 full_path: entry.path(),
729 730 metadata,
730 731 })
731 732 }
732 733 Ok(results)
733 734 }
734 735 }
735 736
736 737 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
737 738 /// of the give repository.
738 739 ///
739 740 /// This is similar to `SystemTime::now()`, with the result truncated to the
740 741 /// same time resolution as other files’ modification times. Using `.hg`
741 742 /// instead of the system’s default temporary directory (such as `/tmp`) makes
742 743 /// it more likely the temporary file is in the same disk partition as contents
743 744 /// of the working directory, which can matter since different filesystems may
744 745 /// store timestamps with different resolutions.
745 746 ///
746 747 /// This may fail, typically if we lack write permissions. In that case we
747 748 /// should continue the `status()` algoritm anyway and consider the current
748 749 /// date/time to be unknown.
749 750 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
750 751 tempfile::tempfile_in(repo_root.join(".hg"))?
751 752 .metadata()?
752 753 .modified()
753 754 }
General Comments 0
You need to be logged in to leave comments. Login now