##// END OF EJS Templates
rust: add message to `DirstateV2ParseError` to give some context...
Raphaël Gomès -
r50268:f8ec7b16 stable
parent child Browse files
Show More
@@ -1,723 +1,723 b''
1 1 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
2 2 use crate::errors::HgError;
3 3 use bitflags::bitflags;
4 4 use std::convert::{TryFrom, TryInto};
5 5 use std::fs;
6 6 use std::io;
7 7 use std::time::{SystemTime, UNIX_EPOCH};
8 8
9 9 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
10 10 pub enum EntryState {
11 11 Normal,
12 12 Added,
13 13 Removed,
14 14 Merged,
15 15 }
16 16
17 17 /// `size` and `mtime.seconds` are truncated to 31 bits.
18 18 ///
19 19 /// TODO: double-check status algorithm correctness for files
20 20 /// larger than 2 GiB or modified after 2038.
21 21 #[derive(Debug, Copy, Clone)]
22 22 pub struct DirstateEntry {
23 23 pub(crate) flags: Flags,
24 24 mode_size: Option<(u32, u32)>,
25 25 mtime: Option<TruncatedTimestamp>,
26 26 }
27 27
28 28 bitflags! {
29 29 pub(crate) struct Flags: u8 {
30 30 const WDIR_TRACKED = 1 << 0;
31 31 const P1_TRACKED = 1 << 1;
32 32 const P2_INFO = 1 << 2;
33 33 const HAS_FALLBACK_EXEC = 1 << 3;
34 34 const FALLBACK_EXEC = 1 << 4;
35 35 const HAS_FALLBACK_SYMLINK = 1 << 5;
36 36 const FALLBACK_SYMLINK = 1 << 6;
37 37 }
38 38 }
39 39
40 40 /// A Unix timestamp with nanoseconds precision
41 41 #[derive(Debug, Copy, Clone)]
42 42 pub struct TruncatedTimestamp {
43 43 truncated_seconds: u32,
44 44 /// Always in the `0 .. 1_000_000_000` range.
45 45 nanoseconds: u32,
46 46 /// TODO this should be in DirstateEntry, but the current code needs
47 47 /// refactoring to use DirstateEntry instead of TruncatedTimestamp for
48 48 /// comparison.
49 49 pub second_ambiguous: bool,
50 50 }
51 51
52 52 impl TruncatedTimestamp {
53 53 /// Constructs from a timestamp potentially outside of the supported range,
54 54 /// and truncate the seconds components to its lower 31 bits.
55 55 ///
56 56 /// Panics if the nanoseconds components is not in the expected range.
57 57 pub fn new_truncate(
58 58 seconds: i64,
59 59 nanoseconds: u32,
60 60 second_ambiguous: bool,
61 61 ) -> Self {
62 62 assert!(nanoseconds < NSEC_PER_SEC);
63 63 Self {
64 64 truncated_seconds: seconds as u32 & RANGE_MASK_31BIT,
65 65 nanoseconds,
66 66 second_ambiguous,
67 67 }
68 68 }
69 69
70 70 /// Construct from components. Returns an error if they are not in the
71 71 /// expcted range.
72 72 pub fn from_already_truncated(
73 73 truncated_seconds: u32,
74 74 nanoseconds: u32,
75 75 second_ambiguous: bool,
76 76 ) -> Result<Self, DirstateV2ParseError> {
77 77 if truncated_seconds & !RANGE_MASK_31BIT == 0
78 78 && nanoseconds < NSEC_PER_SEC
79 79 {
80 80 Ok(Self {
81 81 truncated_seconds,
82 82 nanoseconds,
83 83 second_ambiguous,
84 84 })
85 85 } else {
86 Err(DirstateV2ParseError)
86 Err(DirstateV2ParseError::new("when reading datetime"))
87 87 }
88 88 }
89 89
90 90 /// Returns a `TruncatedTimestamp` for the modification time of `metadata`.
91 91 ///
92 92 /// Propagates errors from `std` on platforms where modification time
93 93 /// is not available at all.
94 94 pub fn for_mtime_of(metadata: &fs::Metadata) -> io::Result<Self> {
95 95 #[cfg(unix)]
96 96 {
97 97 use std::os::unix::fs::MetadataExt;
98 98 let seconds = metadata.mtime();
99 99 // i64 -> u32 with value always in the `0 .. NSEC_PER_SEC` range
100 100 let nanoseconds = metadata.mtime_nsec().try_into().unwrap();
101 101 Ok(Self::new_truncate(seconds, nanoseconds, false))
102 102 }
103 103 #[cfg(not(unix))]
104 104 {
105 105 metadata.modified().map(Self::from)
106 106 }
107 107 }
108 108
109 109 /// Like `for_mtime_of`, but may return `None` or a value with
110 110 /// `second_ambiguous` set if the mtime is not "reliable".
111 111 ///
112 112 /// A modification time is reliable if it is older than `boundary` (or
113 113 /// sufficiently in the future).
114 114 ///
115 115 /// Otherwise a concurrent modification might happens with the same mtime.
116 116 pub fn for_reliable_mtime_of(
117 117 metadata: &fs::Metadata,
118 118 boundary: &Self,
119 119 ) -> io::Result<Option<Self>> {
120 120 let mut mtime = Self::for_mtime_of(metadata)?;
121 121 // If the mtime of the ambiguous file is younger (or equal) to the
122 122 // starting point of the `status` walk, we cannot garantee that
123 123 // another, racy, write will not happen right after with the same mtime
124 124 // and we cannot cache the information.
125 125 //
126 126 // However if the mtime is far away in the future, this is likely some
127 127 // mismatch between the current clock and previous file system
128 128 // operation. So mtime more than one days in the future are considered
129 129 // fine.
130 130 let reliable = if mtime.truncated_seconds == boundary.truncated_seconds
131 131 {
132 132 mtime.second_ambiguous = true;
133 133 mtime.nanoseconds != 0
134 134 && boundary.nanoseconds != 0
135 135 && mtime.nanoseconds < boundary.nanoseconds
136 136 } else {
137 137 // `truncated_seconds` is less than 2**31,
138 138 // so this does not overflow `u32`:
139 139 let one_day_later = boundary.truncated_seconds + 24 * 3600;
140 140 mtime.truncated_seconds < boundary.truncated_seconds
141 141 || mtime.truncated_seconds > one_day_later
142 142 };
143 143 if reliable {
144 144 Ok(Some(mtime))
145 145 } else {
146 146 Ok(None)
147 147 }
148 148 }
149 149
150 150 /// The lower 31 bits of the number of seconds since the epoch.
151 151 pub fn truncated_seconds(&self) -> u32 {
152 152 self.truncated_seconds
153 153 }
154 154
155 155 /// The sub-second component of this timestamp, in nanoseconds.
156 156 /// Always in the `0 .. 1_000_000_000` range.
157 157 ///
158 158 /// This timestamp is after `(seconds, 0)` by this many nanoseconds.
159 159 pub fn nanoseconds(&self) -> u32 {
160 160 self.nanoseconds
161 161 }
162 162
163 163 /// Returns whether two timestamps are equal modulo 2**31 seconds.
164 164 ///
165 165 /// If this returns `true`, the original values converted from `SystemTime`
166 166 /// or given to `new_truncate` were very likely equal. A false positive is
167 167 /// possible if they were exactly a multiple of 2**31 seconds apart (around
168 168 /// 68 years). This is deemed very unlikely to happen by chance, especially
169 169 /// on filesystems that support sub-second precision.
170 170 ///
171 171 /// If someone is manipulating the modification times of some files to
172 172 /// intentionally make `hg status` return incorrect results, not truncating
173 173 /// wouldn’t help much since they can set exactly the expected timestamp.
174 174 ///
175 175 /// Sub-second precision is ignored if it is zero in either value.
176 176 /// Some APIs simply return zero when more precision is not available.
177 177 /// When comparing values from different sources, if only one is truncated
178 178 /// in that way, doing a simple comparison would cause many false
179 179 /// negatives.
180 180 pub fn likely_equal(self, other: Self) -> bool {
181 181 if self.truncated_seconds != other.truncated_seconds {
182 182 false
183 183 } else if self.nanoseconds == 0 || other.nanoseconds == 0 {
184 184 if self.second_ambiguous {
185 185 false
186 186 } else {
187 187 true
188 188 }
189 189 } else {
190 190 self.nanoseconds == other.nanoseconds
191 191 }
192 192 }
193 193
194 194 pub fn likely_equal_to_mtime_of(
195 195 self,
196 196 metadata: &fs::Metadata,
197 197 ) -> io::Result<bool> {
198 198 Ok(self.likely_equal(Self::for_mtime_of(metadata)?))
199 199 }
200 200 }
201 201
202 202 impl From<SystemTime> for TruncatedTimestamp {
203 203 fn from(system_time: SystemTime) -> Self {
204 204 // On Unix, `SystemTime` is a wrapper for the `timespec` C struct:
205 205 // https://www.gnu.org/software/libc/manual/html_node/Time-Types.html#index-struct-timespec
206 206 // We want to effectively access its fields, but the Rust standard
207 207 // library does not expose them. The best we can do is:
208 208 let seconds;
209 209 let nanoseconds;
210 210 match system_time.duration_since(UNIX_EPOCH) {
211 211 Ok(duration) => {
212 212 seconds = duration.as_secs() as i64;
213 213 nanoseconds = duration.subsec_nanos();
214 214 }
215 215 Err(error) => {
216 216 // `system_time` is before `UNIX_EPOCH`.
217 217 // We need to undo this algorithm:
218 218 // https://github.com/rust-lang/rust/blob/6bed1f0bc3cc50c10aab26d5f94b16a00776b8a5/library/std/src/sys/unix/time.rs#L40-L41
219 219 let negative = error.duration();
220 220 let negative_secs = negative.as_secs() as i64;
221 221 let negative_nanos = negative.subsec_nanos();
222 222 if negative_nanos == 0 {
223 223 seconds = -negative_secs;
224 224 nanoseconds = 0;
225 225 } else {
226 226 // For example if `system_time` was 4.3 seconds before
227 227 // the Unix epoch we get a Duration that represents
228 228 // `(-4, -0.3)` but we want `(-5, +0.7)`:
229 229 seconds = -1 - negative_secs;
230 230 nanoseconds = NSEC_PER_SEC - negative_nanos;
231 231 }
232 232 }
233 233 };
234 234 Self::new_truncate(seconds, nanoseconds, false)
235 235 }
236 236 }
237 237
238 238 const NSEC_PER_SEC: u32 = 1_000_000_000;
239 239 pub const RANGE_MASK_31BIT: u32 = 0x7FFF_FFFF;
240 240
241 241 pub const MTIME_UNSET: i32 = -1;
242 242
243 243 /// A `DirstateEntry` with a size of `-2` means that it was merged from the
244 244 /// other parent. This allows revert to pick the right status back during a
245 245 /// merge.
246 246 pub const SIZE_FROM_OTHER_PARENT: i32 = -2;
247 247 /// A special value used for internal representation of special case in
248 248 /// dirstate v1 format.
249 249 pub const SIZE_NON_NORMAL: i32 = -1;
250 250
251 251 #[derive(Debug, Default, Copy, Clone)]
252 252 pub struct DirstateV2Data {
253 253 pub wc_tracked: bool,
254 254 pub p1_tracked: bool,
255 255 pub p2_info: bool,
256 256 pub mode_size: Option<(u32, u32)>,
257 257 pub mtime: Option<TruncatedTimestamp>,
258 258 pub fallback_exec: Option<bool>,
259 259 pub fallback_symlink: Option<bool>,
260 260 }
261 261
262 262 #[derive(Debug, Default, Copy, Clone)]
263 263 pub struct ParentFileData {
264 264 pub mode_size: Option<(u32, u32)>,
265 265 pub mtime: Option<TruncatedTimestamp>,
266 266 }
267 267
268 268 impl DirstateEntry {
269 269 pub fn from_v2_data(v2_data: DirstateV2Data) -> Self {
270 270 let DirstateV2Data {
271 271 wc_tracked,
272 272 p1_tracked,
273 273 p2_info,
274 274 mode_size,
275 275 mtime,
276 276 fallback_exec,
277 277 fallback_symlink,
278 278 } = v2_data;
279 279 if let Some((mode, size)) = mode_size {
280 280 // TODO: return an error for out of range values?
281 281 assert!(mode & !RANGE_MASK_31BIT == 0);
282 282 assert!(size & !RANGE_MASK_31BIT == 0);
283 283 }
284 284 let mut flags = Flags::empty();
285 285 flags.set(Flags::WDIR_TRACKED, wc_tracked);
286 286 flags.set(Flags::P1_TRACKED, p1_tracked);
287 287 flags.set(Flags::P2_INFO, p2_info);
288 288 if let Some(exec) = fallback_exec {
289 289 flags.insert(Flags::HAS_FALLBACK_EXEC);
290 290 if exec {
291 291 flags.insert(Flags::FALLBACK_EXEC);
292 292 }
293 293 }
294 294 if let Some(exec) = fallback_symlink {
295 295 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
296 296 if exec {
297 297 flags.insert(Flags::FALLBACK_SYMLINK);
298 298 }
299 299 }
300 300 Self {
301 301 flags,
302 302 mode_size,
303 303 mtime,
304 304 }
305 305 }
306 306
307 307 pub fn from_v1_data(
308 308 state: EntryState,
309 309 mode: i32,
310 310 size: i32,
311 311 mtime: i32,
312 312 ) -> Self {
313 313 match state {
314 314 EntryState::Normal => {
315 315 if size == SIZE_FROM_OTHER_PARENT {
316 316 Self {
317 317 // might be missing P1_TRACKED
318 318 flags: Flags::WDIR_TRACKED | Flags::P2_INFO,
319 319 mode_size: None,
320 320 mtime: None,
321 321 }
322 322 } else if size == SIZE_NON_NORMAL {
323 323 Self {
324 324 flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
325 325 mode_size: None,
326 326 mtime: None,
327 327 }
328 328 } else if mtime == MTIME_UNSET {
329 329 // TODO: return an error for negative values?
330 330 let mode = u32::try_from(mode).unwrap();
331 331 let size = u32::try_from(size).unwrap();
332 332 Self {
333 333 flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
334 334 mode_size: Some((mode, size)),
335 335 mtime: None,
336 336 }
337 337 } else {
338 338 // TODO: return an error for negative values?
339 339 let mode = u32::try_from(mode).unwrap();
340 340 let size = u32::try_from(size).unwrap();
341 341 let mtime = u32::try_from(mtime).unwrap();
342 342 let mtime = TruncatedTimestamp::from_already_truncated(
343 343 mtime, 0, false,
344 344 )
345 345 .unwrap();
346 346 Self {
347 347 flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
348 348 mode_size: Some((mode, size)),
349 349 mtime: Some(mtime),
350 350 }
351 351 }
352 352 }
353 353 EntryState::Added => Self {
354 354 flags: Flags::WDIR_TRACKED,
355 355 mode_size: None,
356 356 mtime: None,
357 357 },
358 358 EntryState::Removed => Self {
359 359 flags: if size == SIZE_NON_NORMAL {
360 360 Flags::P1_TRACKED | Flags::P2_INFO
361 361 } else if size == SIZE_FROM_OTHER_PARENT {
362 362 // We don’t know if P1_TRACKED should be set (file history)
363 363 Flags::P2_INFO
364 364 } else {
365 365 Flags::P1_TRACKED
366 366 },
367 367 mode_size: None,
368 368 mtime: None,
369 369 },
370 370 EntryState::Merged => Self {
371 371 flags: Flags::WDIR_TRACKED
372 372 | Flags::P1_TRACKED // might not be true because of rename ?
373 373 | Flags::P2_INFO, // might not be true because of rename ?
374 374 mode_size: None,
375 375 mtime: None,
376 376 },
377 377 }
378 378 }
379 379
380 380 /// Creates a new entry in "removed" state.
381 381 ///
382 382 /// `size` is expected to be zero, `SIZE_NON_NORMAL`, or
383 383 /// `SIZE_FROM_OTHER_PARENT`
384 384 pub fn new_removed(size: i32) -> Self {
385 385 Self::from_v1_data(EntryState::Removed, 0, size, 0)
386 386 }
387 387
388 388 pub fn new_tracked() -> Self {
389 389 let data = DirstateV2Data {
390 390 wc_tracked: true,
391 391 ..Default::default()
392 392 };
393 393 Self::from_v2_data(data)
394 394 }
395 395
396 396 pub fn tracked(&self) -> bool {
397 397 self.flags.contains(Flags::WDIR_TRACKED)
398 398 }
399 399
400 400 pub fn p1_tracked(&self) -> bool {
401 401 self.flags.contains(Flags::P1_TRACKED)
402 402 }
403 403
404 404 fn in_either_parent(&self) -> bool {
405 405 self.flags.intersects(Flags::P1_TRACKED | Flags::P2_INFO)
406 406 }
407 407
408 408 pub fn removed(&self) -> bool {
409 409 self.in_either_parent() && !self.flags.contains(Flags::WDIR_TRACKED)
410 410 }
411 411
412 412 pub fn p2_info(&self) -> bool {
413 413 self.flags.contains(Flags::WDIR_TRACKED | Flags::P2_INFO)
414 414 }
415 415
416 416 pub fn added(&self) -> bool {
417 417 self.flags.contains(Flags::WDIR_TRACKED) && !self.in_either_parent()
418 418 }
419 419
420 420 pub fn modified(&self) -> bool {
421 421 self.flags
422 422 .contains(Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO)
423 423 }
424 424
425 425 pub fn maybe_clean(&self) -> bool {
426 426 if !self.flags.contains(Flags::WDIR_TRACKED) {
427 427 false
428 428 } else if !self.flags.contains(Flags::P1_TRACKED) {
429 429 false
430 430 } else if self.flags.contains(Flags::P2_INFO) {
431 431 false
432 432 } else {
433 433 true
434 434 }
435 435 }
436 436
437 437 pub fn any_tracked(&self) -> bool {
438 438 self.flags.intersects(
439 439 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
440 440 )
441 441 }
442 442
443 443 pub(crate) fn v2_data(&self) -> DirstateV2Data {
444 444 if !self.any_tracked() {
445 445 // TODO: return an Option instead?
446 446 panic!("Accessing v2_data of an untracked DirstateEntry")
447 447 }
448 448 let wc_tracked = self.flags.contains(Flags::WDIR_TRACKED);
449 449 let p1_tracked = self.flags.contains(Flags::P1_TRACKED);
450 450 let p2_info = self.flags.contains(Flags::P2_INFO);
451 451 let mode_size = self.mode_size;
452 452 let mtime = self.mtime;
453 453 DirstateV2Data {
454 454 wc_tracked,
455 455 p1_tracked,
456 456 p2_info,
457 457 mode_size,
458 458 mtime,
459 459 fallback_exec: self.get_fallback_exec(),
460 460 fallback_symlink: self.get_fallback_symlink(),
461 461 }
462 462 }
463 463
464 464 fn v1_state(&self) -> EntryState {
465 465 if !self.any_tracked() {
466 466 // TODO: return an Option instead?
467 467 panic!("Accessing v1_state of an untracked DirstateEntry")
468 468 }
469 469 if self.removed() {
470 470 EntryState::Removed
471 471 } else if self.modified() {
472 472 EntryState::Merged
473 473 } else if self.added() {
474 474 EntryState::Added
475 475 } else {
476 476 EntryState::Normal
477 477 }
478 478 }
479 479
480 480 fn v1_mode(&self) -> i32 {
481 481 if let Some((mode, _size)) = self.mode_size {
482 482 i32::try_from(mode).unwrap()
483 483 } else {
484 484 0
485 485 }
486 486 }
487 487
488 488 fn v1_size(&self) -> i32 {
489 489 if !self.any_tracked() {
490 490 // TODO: return an Option instead?
491 491 panic!("Accessing v1_size of an untracked DirstateEntry")
492 492 }
493 493 if self.removed()
494 494 && self.flags.contains(Flags::P1_TRACKED | Flags::P2_INFO)
495 495 {
496 496 SIZE_NON_NORMAL
497 497 } else if self.flags.contains(Flags::P2_INFO) {
498 498 SIZE_FROM_OTHER_PARENT
499 499 } else if self.removed() {
500 500 0
501 501 } else if self.added() {
502 502 SIZE_NON_NORMAL
503 503 } else if let Some((_mode, size)) = self.mode_size {
504 504 i32::try_from(size).unwrap()
505 505 } else {
506 506 SIZE_NON_NORMAL
507 507 }
508 508 }
509 509
510 510 fn v1_mtime(&self) -> i32 {
511 511 if !self.any_tracked() {
512 512 // TODO: return an Option instead?
513 513 panic!("Accessing v1_mtime of an untracked DirstateEntry")
514 514 }
515 515 if self.removed() {
516 516 0
517 517 } else if self.flags.contains(Flags::P2_INFO) {
518 518 MTIME_UNSET
519 519 } else if !self.flags.contains(Flags::P1_TRACKED) {
520 520 MTIME_UNSET
521 521 } else if let Some(mtime) = self.mtime {
522 522 if mtime.second_ambiguous {
523 523 MTIME_UNSET
524 524 } else {
525 525 i32::try_from(mtime.truncated_seconds()).unwrap()
526 526 }
527 527 } else {
528 528 MTIME_UNSET
529 529 }
530 530 }
531 531
532 532 // TODO: return `Option<EntryState>`? None when `!self.any_tracked`
533 533 pub fn state(&self) -> EntryState {
534 534 self.v1_state()
535 535 }
536 536
537 537 // TODO: return Option?
538 538 pub fn mode(&self) -> i32 {
539 539 self.v1_mode()
540 540 }
541 541
542 542 // TODO: return Option?
543 543 pub fn size(&self) -> i32 {
544 544 self.v1_size()
545 545 }
546 546
547 547 // TODO: return Option?
548 548 pub fn mtime(&self) -> i32 {
549 549 self.v1_mtime()
550 550 }
551 551
552 552 pub fn get_fallback_exec(&self) -> Option<bool> {
553 553 if self.flags.contains(Flags::HAS_FALLBACK_EXEC) {
554 554 Some(self.flags.contains(Flags::FALLBACK_EXEC))
555 555 } else {
556 556 None
557 557 }
558 558 }
559 559
560 560 pub fn set_fallback_exec(&mut self, value: Option<bool>) {
561 561 match value {
562 562 None => {
563 563 self.flags.remove(Flags::HAS_FALLBACK_EXEC);
564 564 self.flags.remove(Flags::FALLBACK_EXEC);
565 565 }
566 566 Some(exec) => {
567 567 self.flags.insert(Flags::HAS_FALLBACK_EXEC);
568 568 if exec {
569 569 self.flags.insert(Flags::FALLBACK_EXEC);
570 570 }
571 571 }
572 572 }
573 573 }
574 574
575 575 pub fn get_fallback_symlink(&self) -> Option<bool> {
576 576 if self.flags.contains(Flags::HAS_FALLBACK_SYMLINK) {
577 577 Some(self.flags.contains(Flags::FALLBACK_SYMLINK))
578 578 } else {
579 579 None
580 580 }
581 581 }
582 582
583 583 pub fn set_fallback_symlink(&mut self, value: Option<bool>) {
584 584 match value {
585 585 None => {
586 586 self.flags.remove(Flags::HAS_FALLBACK_SYMLINK);
587 587 self.flags.remove(Flags::FALLBACK_SYMLINK);
588 588 }
589 589 Some(symlink) => {
590 590 self.flags.insert(Flags::HAS_FALLBACK_SYMLINK);
591 591 if symlink {
592 592 self.flags.insert(Flags::FALLBACK_SYMLINK);
593 593 }
594 594 }
595 595 }
596 596 }
597 597
598 598 pub fn truncated_mtime(&self) -> Option<TruncatedTimestamp> {
599 599 self.mtime
600 600 }
601 601
602 602 pub fn drop_merge_data(&mut self) {
603 603 if self.flags.contains(Flags::P2_INFO) {
604 604 self.flags.remove(Flags::P2_INFO);
605 605 self.mode_size = None;
606 606 self.mtime = None;
607 607 }
608 608 }
609 609
610 610 pub fn set_possibly_dirty(&mut self) {
611 611 self.mtime = None
612 612 }
613 613
614 614 pub fn set_clean(
615 615 &mut self,
616 616 mode: u32,
617 617 size: u32,
618 618 mtime: TruncatedTimestamp,
619 619 ) {
620 620 let size = size & RANGE_MASK_31BIT;
621 621 self.flags.insert(Flags::WDIR_TRACKED | Flags::P1_TRACKED);
622 622 self.mode_size = Some((mode, size));
623 623 self.mtime = Some(mtime);
624 624 }
625 625
626 626 pub fn set_tracked(&mut self) {
627 627 self.flags.insert(Flags::WDIR_TRACKED);
628 628 // `set_tracked` is replacing various `normallookup` call. So we mark
629 629 // the files as needing lookup
630 630 //
631 631 // Consider dropping this in the future in favor of something less
632 632 // broad.
633 633 self.mtime = None;
634 634 }
635 635
636 636 pub fn set_untracked(&mut self) {
637 637 self.flags.remove(Flags::WDIR_TRACKED);
638 638 self.mode_size = None;
639 639 self.mtime = None;
640 640 }
641 641
642 642 /// Returns `(state, mode, size, mtime)` for the puprose of serialization
643 643 /// in the dirstate-v1 format.
644 644 ///
645 645 /// This includes marker values such as `mtime == -1`. In the future we may
646 646 /// want to not represent these cases that way in memory, but serialization
647 647 /// will need to keep the same format.
648 648 pub fn v1_data(&self) -> (u8, i32, i32, i32) {
649 649 (
650 650 self.v1_state().into(),
651 651 self.v1_mode(),
652 652 self.v1_size(),
653 653 self.v1_mtime(),
654 654 )
655 655 }
656 656
657 657 pub(crate) fn is_from_other_parent(&self) -> bool {
658 658 self.flags.contains(Flags::WDIR_TRACKED | Flags::P2_INFO)
659 659 }
660 660
661 661 // TODO: other platforms
662 662 #[cfg(unix)]
663 663 pub fn mode_changed(
664 664 &self,
665 665 filesystem_metadata: &std::fs::Metadata,
666 666 ) -> bool {
667 667 let dirstate_exec_bit = (self.mode() as u32 & EXEC_BIT_MASK) != 0;
668 668 let fs_exec_bit = has_exec_bit(filesystem_metadata);
669 669 dirstate_exec_bit != fs_exec_bit
670 670 }
671 671
672 672 /// Returns a `(state, mode, size, mtime)` tuple as for
673 673 /// `DirstateMapMethods::debug_iter`.
674 674 pub fn debug_tuple(&self) -> (u8, i32, i32, i32) {
675 675 (self.state().into(), self.mode(), self.size(), self.mtime())
676 676 }
677 677 }
678 678
679 679 impl EntryState {
680 680 pub fn is_tracked(self) -> bool {
681 681 use EntryState::*;
682 682 match self {
683 683 Normal | Added | Merged => true,
684 684 Removed => false,
685 685 }
686 686 }
687 687 }
688 688
689 689 impl TryFrom<u8> for EntryState {
690 690 type Error = HgError;
691 691
692 692 fn try_from(value: u8) -> Result<Self, Self::Error> {
693 693 match value {
694 694 b'n' => Ok(EntryState::Normal),
695 695 b'a' => Ok(EntryState::Added),
696 696 b'r' => Ok(EntryState::Removed),
697 697 b'm' => Ok(EntryState::Merged),
698 698 _ => Err(HgError::CorruptedRepository(format!(
699 699 "Incorrect dirstate entry state {}",
700 700 value
701 701 ))),
702 702 }
703 703 }
704 704 }
705 705
706 706 impl Into<u8> for EntryState {
707 707 fn into(self) -> u8 {
708 708 match self {
709 709 EntryState::Normal => b'n',
710 710 EntryState::Added => b'a',
711 711 EntryState::Removed => b'r',
712 712 EntryState::Merged => b'm',
713 713 }
714 714 }
715 715 }
716 716
717 717 const EXEC_BIT_MASK: u32 = 0o100;
718 718
719 719 pub fn has_exec_bit(metadata: &std::fs::Metadata) -> bool {
720 720 // TODO: How to handle executable permissions on Windows?
721 721 use std::os::unix::fs::MetadataExt;
722 722 (metadata.mode() & EXEC_BIT_MASK) != 0
723 723 }
@@ -1,1907 +1,1907 b''
1 1 use bytes_cast::BytesCast;
2 2 use micro_timer::timed;
3 3 use std::borrow::Cow;
4 4 use std::path::PathBuf;
5 5
6 6 use super::on_disk;
7 7 use super::on_disk::DirstateV2ParseError;
8 8 use super::owning::OwningDirstateMap;
9 9 use super::path_with_basename::WithBasename;
10 10 use crate::dirstate::parsers::pack_entry;
11 11 use crate::dirstate::parsers::packed_entry_size;
12 12 use crate::dirstate::parsers::parse_dirstate_entries;
13 13 use crate::dirstate::CopyMapIter;
14 14 use crate::dirstate::DirstateV2Data;
15 15 use crate::dirstate::ParentFileData;
16 16 use crate::dirstate::StateMapIter;
17 17 use crate::dirstate::TruncatedTimestamp;
18 18 use crate::matchers::Matcher;
19 19 use crate::utils::hg_path::{HgPath, HgPathBuf};
20 20 use crate::DirstateEntry;
21 21 use crate::DirstateError;
22 22 use crate::DirstateMapError;
23 23 use crate::DirstateParents;
24 24 use crate::DirstateStatus;
25 25 use crate::FastHashbrownMap as FastHashMap;
26 26 use crate::PatternFileWarning;
27 27 use crate::StatusError;
28 28 use crate::StatusOptions;
29 29
30 30 /// Append to an existing data file if the amount of unreachable data (not used
31 31 /// anymore) is less than this fraction of the total amount of existing data.
32 32 const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5;
33 33
34 34 #[derive(Debug, PartialEq, Eq)]
35 35 /// Version of the on-disk format
36 36 pub enum DirstateVersion {
37 37 V1,
38 38 V2,
39 39 }
40 40
41 41 #[derive(Debug)]
42 42 pub struct DirstateMap<'on_disk> {
43 43 /// Contents of the `.hg/dirstate` file
44 44 pub(super) on_disk: &'on_disk [u8],
45 45
46 46 pub(super) root: ChildNodes<'on_disk>,
47 47
48 48 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
49 49 pub(super) nodes_with_entry_count: u32,
50 50
51 51 /// Number of nodes anywhere in the tree that have
52 52 /// `.copy_source.is_some()`.
53 53 pub(super) nodes_with_copy_source_count: u32,
54 54
55 55 /// See on_disk::Header
56 56 pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
57 57
58 58 /// How many bytes of `on_disk` are not used anymore
59 59 pub(super) unreachable_bytes: u32,
60 60
61 61 /// Size of the data used to first load this `DirstateMap`. Used in case
62 62 /// we need to write some new metadata, but no new data on disk.
63 63 pub(super) old_data_size: usize,
64 64
65 65 pub(super) dirstate_version: DirstateVersion,
66 66 }
67 67
68 68 /// Using a plain `HgPathBuf` of the full path from the repository root as a
69 69 /// map key would also work: all paths in a given map have the same parent
70 70 /// path, so comparing full paths gives the same result as comparing base
71 71 /// names. However `HashMap` would waste time always re-hashing the same
72 72 /// string prefix.
73 73 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
74 74
75 75 /// Similar to `&'tree Cow<'on_disk, HgPath>`, but can also be returned
76 76 /// for on-disk nodes that don’t actually have a `Cow` to borrow.
77 77 #[derive(Debug)]
78 78 pub(super) enum BorrowedPath<'tree, 'on_disk> {
79 79 InMemory(&'tree HgPathBuf),
80 80 OnDisk(&'on_disk HgPath),
81 81 }
82 82
83 83 #[derive(Debug)]
84 84 pub(super) enum ChildNodes<'on_disk> {
85 85 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
86 86 OnDisk(&'on_disk [on_disk::Node]),
87 87 }
88 88
89 89 #[derive(Debug)]
90 90 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
91 91 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
92 92 OnDisk(&'on_disk [on_disk::Node]),
93 93 }
94 94
95 95 #[derive(Debug)]
96 96 pub(super) enum NodeRef<'tree, 'on_disk> {
97 97 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
98 98 OnDisk(&'on_disk on_disk::Node),
99 99 }
100 100
101 101 impl<'tree, 'on_disk> BorrowedPath<'tree, 'on_disk> {
102 102 pub fn detach_from_tree(&self) -> Cow<'on_disk, HgPath> {
103 103 match *self {
104 104 BorrowedPath::InMemory(in_memory) => Cow::Owned(in_memory.clone()),
105 105 BorrowedPath::OnDisk(on_disk) => Cow::Borrowed(on_disk),
106 106 }
107 107 }
108 108 }
109 109
110 110 impl<'tree, 'on_disk> std::ops::Deref for BorrowedPath<'tree, 'on_disk> {
111 111 type Target = HgPath;
112 112
113 113 fn deref(&self) -> &HgPath {
114 114 match *self {
115 115 BorrowedPath::InMemory(in_memory) => in_memory,
116 116 BorrowedPath::OnDisk(on_disk) => on_disk,
117 117 }
118 118 }
119 119 }
120 120
121 121 impl Default for ChildNodes<'_> {
122 122 fn default() -> Self {
123 123 ChildNodes::InMemory(Default::default())
124 124 }
125 125 }
126 126
127 127 impl<'on_disk> ChildNodes<'on_disk> {
128 128 pub(super) fn as_ref<'tree>(
129 129 &'tree self,
130 130 ) -> ChildNodesRef<'tree, 'on_disk> {
131 131 match self {
132 132 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
133 133 ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes),
134 134 }
135 135 }
136 136
137 137 pub(super) fn is_empty(&self) -> bool {
138 138 match self {
139 139 ChildNodes::InMemory(nodes) => nodes.is_empty(),
140 140 ChildNodes::OnDisk(nodes) => nodes.is_empty(),
141 141 }
142 142 }
143 143
144 144 fn make_mut(
145 145 &mut self,
146 146 on_disk: &'on_disk [u8],
147 147 unreachable_bytes: &mut u32,
148 148 ) -> Result<
149 149 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
150 150 DirstateV2ParseError,
151 151 > {
152 152 match self {
153 153 ChildNodes::InMemory(nodes) => Ok(nodes),
154 154 ChildNodes::OnDisk(nodes) => {
155 155 *unreachable_bytes +=
156 156 std::mem::size_of_val::<[on_disk::Node]>(nodes) as u32;
157 157 let nodes = nodes
158 158 .iter()
159 159 .map(|node| {
160 160 Ok((
161 161 node.path(on_disk)?,
162 162 node.to_in_memory_node(on_disk)?,
163 163 ))
164 164 })
165 165 .collect::<Result<_, _>>()?;
166 166 *self = ChildNodes::InMemory(nodes);
167 167 match self {
168 168 ChildNodes::InMemory(nodes) => Ok(nodes),
169 169 ChildNodes::OnDisk(_) => unreachable!(),
170 170 }
171 171 }
172 172 }
173 173 }
174 174 }
175 175
176 176 impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
177 177 pub(super) fn get(
178 178 &self,
179 179 base_name: &HgPath,
180 180 on_disk: &'on_disk [u8],
181 181 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
182 182 match self {
183 183 ChildNodesRef::InMemory(nodes) => Ok(nodes
184 184 .get_key_value(base_name)
185 185 .map(|(k, v)| NodeRef::InMemory(k, v))),
186 186 ChildNodesRef::OnDisk(nodes) => {
187 187 let mut parse_result = Ok(());
188 188 let search_result = nodes.binary_search_by(|node| {
189 189 match node.base_name(on_disk) {
190 190 Ok(node_base_name) => node_base_name.cmp(base_name),
191 191 Err(e) => {
192 192 parse_result = Err(e);
193 193 // Dummy comparison result, `search_result` won’t
194 194 // be used since `parse_result` is an error
195 195 std::cmp::Ordering::Equal
196 196 }
197 197 }
198 198 });
199 199 parse_result.map(|()| {
200 200 search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i]))
201 201 })
202 202 }
203 203 }
204 204 }
205 205
206 206 /// Iterate in undefined order
207 207 pub(super) fn iter(
208 208 &self,
209 209 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
210 210 match self {
211 211 ChildNodesRef::InMemory(nodes) => itertools::Either::Left(
212 212 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)),
213 213 ),
214 214 ChildNodesRef::OnDisk(nodes) => {
215 215 itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk))
216 216 }
217 217 }
218 218 }
219 219
220 220 /// Iterate in parallel in undefined order
221 221 pub(super) fn par_iter(
222 222 &self,
223 223 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
224 224 {
225 225 use rayon::prelude::*;
226 226 match self {
227 227 ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left(
228 228 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)),
229 229 ),
230 230 ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right(
231 231 nodes.par_iter().map(NodeRef::OnDisk),
232 232 ),
233 233 }
234 234 }
235 235
236 236 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
237 237 match self {
238 238 ChildNodesRef::InMemory(nodes) => {
239 239 let mut vec: Vec<_> = nodes
240 240 .iter()
241 241 .map(|(k, v)| NodeRef::InMemory(k, v))
242 242 .collect();
243 243 fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
244 244 match node {
245 245 NodeRef::InMemory(path, _node) => path.base_name(),
246 246 NodeRef::OnDisk(_) => unreachable!(),
247 247 }
248 248 }
249 249 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
250 250 // value: https://github.com/rust-lang/rust/issues/34162
251 251 vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
252 252 vec
253 253 }
254 254 ChildNodesRef::OnDisk(nodes) => {
255 255 // Nodes on disk are already sorted
256 256 nodes.iter().map(NodeRef::OnDisk).collect()
257 257 }
258 258 }
259 259 }
260 260 }
261 261
262 262 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
263 263 pub(super) fn full_path(
264 264 &self,
265 265 on_disk: &'on_disk [u8],
266 266 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
267 267 match self {
268 268 NodeRef::InMemory(path, _node) => Ok(path.full_path()),
269 269 NodeRef::OnDisk(node) => node.full_path(on_disk),
270 270 }
271 271 }
272 272
273 273 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
274 274 /// HgPath>` detached from `'tree`
275 275 pub(super) fn full_path_borrowed(
276 276 &self,
277 277 on_disk: &'on_disk [u8],
278 278 ) -> Result<BorrowedPath<'tree, 'on_disk>, DirstateV2ParseError> {
279 279 match self {
280 280 NodeRef::InMemory(path, _node) => match path.full_path() {
281 281 Cow::Borrowed(on_disk) => Ok(BorrowedPath::OnDisk(on_disk)),
282 282 Cow::Owned(in_memory) => Ok(BorrowedPath::InMemory(in_memory)),
283 283 },
284 284 NodeRef::OnDisk(node) => {
285 285 Ok(BorrowedPath::OnDisk(node.full_path(on_disk)?))
286 286 }
287 287 }
288 288 }
289 289
290 290 pub(super) fn base_name(
291 291 &self,
292 292 on_disk: &'on_disk [u8],
293 293 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
294 294 match self {
295 295 NodeRef::InMemory(path, _node) => Ok(path.base_name()),
296 296 NodeRef::OnDisk(node) => node.base_name(on_disk),
297 297 }
298 298 }
299 299
300 300 pub(super) fn children(
301 301 &self,
302 302 on_disk: &'on_disk [u8],
303 303 ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
304 304 match self {
305 305 NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
306 306 NodeRef::OnDisk(node) => {
307 307 Ok(ChildNodesRef::OnDisk(node.children(on_disk)?))
308 308 }
309 309 }
310 310 }
311 311
312 312 pub(super) fn has_copy_source(&self) -> bool {
313 313 match self {
314 314 NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
315 315 NodeRef::OnDisk(node) => node.has_copy_source(),
316 316 }
317 317 }
318 318
319 319 pub(super) fn copy_source(
320 320 &self,
321 321 on_disk: &'on_disk [u8],
322 322 ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
323 323 match self {
324 324 NodeRef::InMemory(_path, node) => {
325 325 Ok(node.copy_source.as_ref().map(|s| &**s))
326 326 }
327 327 NodeRef::OnDisk(node) => node.copy_source(on_disk),
328 328 }
329 329 }
330 330 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
331 331 /// HgPath>` detached from `'tree`
332 332 pub(super) fn copy_source_borrowed(
333 333 &self,
334 334 on_disk: &'on_disk [u8],
335 335 ) -> Result<Option<BorrowedPath<'tree, 'on_disk>>, DirstateV2ParseError>
336 336 {
337 337 Ok(match self {
338 338 NodeRef::InMemory(_path, node) => {
339 339 node.copy_source.as_ref().map(|source| match source {
340 340 Cow::Borrowed(on_disk) => BorrowedPath::OnDisk(on_disk),
341 341 Cow::Owned(in_memory) => BorrowedPath::InMemory(in_memory),
342 342 })
343 343 }
344 344 NodeRef::OnDisk(node) => node
345 345 .copy_source(on_disk)?
346 346 .map(|source| BorrowedPath::OnDisk(source)),
347 347 })
348 348 }
349 349
350 350 pub(super) fn entry(
351 351 &self,
352 352 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
353 353 match self {
354 354 NodeRef::InMemory(_path, node) => {
355 355 Ok(node.data.as_entry().copied())
356 356 }
357 357 NodeRef::OnDisk(node) => node.entry(),
358 358 }
359 359 }
360 360
361 361 pub(super) fn cached_directory_mtime(
362 362 &self,
363 363 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
364 364 match self {
365 365 NodeRef::InMemory(_path, node) => Ok(match node.data {
366 366 NodeData::CachedDirectory { mtime } => Some(mtime),
367 367 _ => None,
368 368 }),
369 369 NodeRef::OnDisk(node) => node.cached_directory_mtime(),
370 370 }
371 371 }
372 372
373 373 pub(super) fn descendants_with_entry_count(&self) -> u32 {
374 374 match self {
375 375 NodeRef::InMemory(_path, node) => {
376 376 node.descendants_with_entry_count
377 377 }
378 378 NodeRef::OnDisk(node) => node.descendants_with_entry_count.get(),
379 379 }
380 380 }
381 381
382 382 pub(super) fn tracked_descendants_count(&self) -> u32 {
383 383 match self {
384 384 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
385 385 NodeRef::OnDisk(node) => node.tracked_descendants_count.get(),
386 386 }
387 387 }
388 388 }
389 389
390 390 /// Represents a file or a directory
391 391 #[derive(Default, Debug)]
392 392 pub(super) struct Node<'on_disk> {
393 393 pub(super) data: NodeData,
394 394
395 395 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
396 396
397 397 pub(super) children: ChildNodes<'on_disk>,
398 398
399 399 /// How many (non-inclusive) descendants of this node have an entry.
400 400 pub(super) descendants_with_entry_count: u32,
401 401
402 402 /// How many (non-inclusive) descendants of this node have an entry whose
403 403 /// state is "tracked".
404 404 pub(super) tracked_descendants_count: u32,
405 405 }
406 406
407 407 #[derive(Debug)]
408 408 pub(super) enum NodeData {
409 409 Entry(DirstateEntry),
410 410 CachedDirectory { mtime: TruncatedTimestamp },
411 411 None,
412 412 }
413 413
414 414 impl Default for NodeData {
415 415 fn default() -> Self {
416 416 NodeData::None
417 417 }
418 418 }
419 419
420 420 impl NodeData {
421 421 fn has_entry(&self) -> bool {
422 422 match self {
423 423 NodeData::Entry(_) => true,
424 424 _ => false,
425 425 }
426 426 }
427 427
428 428 fn as_entry(&self) -> Option<&DirstateEntry> {
429 429 match self {
430 430 NodeData::Entry(entry) => Some(entry),
431 431 _ => None,
432 432 }
433 433 }
434 434
435 435 fn as_entry_mut(&mut self) -> Option<&mut DirstateEntry> {
436 436 match self {
437 437 NodeData::Entry(entry) => Some(entry),
438 438 _ => None,
439 439 }
440 440 }
441 441 }
442 442
443 443 impl<'on_disk> DirstateMap<'on_disk> {
444 444 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
445 445 Self {
446 446 on_disk,
447 447 root: ChildNodes::default(),
448 448 nodes_with_entry_count: 0,
449 449 nodes_with_copy_source_count: 0,
450 450 ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
451 451 unreachable_bytes: 0,
452 452 old_data_size: 0,
453 453 dirstate_version: DirstateVersion::V1,
454 454 }
455 455 }
456 456
457 457 #[timed]
458 458 pub fn new_v2(
459 459 on_disk: &'on_disk [u8],
460 460 data_size: usize,
461 461 metadata: &[u8],
462 462 ) -> Result<Self, DirstateError> {
463 463 if let Some(data) = on_disk.get(..data_size) {
464 464 Ok(on_disk::read(data, metadata)?)
465 465 } else {
466 Err(DirstateV2ParseError.into())
466 Err(DirstateV2ParseError::new("not enough bytes on disk").into())
467 467 }
468 468 }
469 469
470 470 #[timed]
471 471 pub fn new_v1(
472 472 on_disk: &'on_disk [u8],
473 473 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
474 474 let mut map = Self::empty(on_disk);
475 475 if map.on_disk.is_empty() {
476 476 return Ok((map, None));
477 477 }
478 478
479 479 let parents = parse_dirstate_entries(
480 480 map.on_disk,
481 481 |path, entry, copy_source| {
482 482 let tracked = entry.tracked();
483 483 let node = Self::get_or_insert_node_inner(
484 484 map.on_disk,
485 485 &mut map.unreachable_bytes,
486 486 &mut map.root,
487 487 path,
488 488 WithBasename::to_cow_borrowed,
489 489 |ancestor| {
490 490 if tracked {
491 491 ancestor.tracked_descendants_count += 1
492 492 }
493 493 ancestor.descendants_with_entry_count += 1
494 494 },
495 495 )?;
496 496 assert!(
497 497 !node.data.has_entry(),
498 498 "duplicate dirstate entry in read"
499 499 );
500 500 assert!(
501 501 node.copy_source.is_none(),
502 502 "duplicate dirstate entry in read"
503 503 );
504 504 node.data = NodeData::Entry(*entry);
505 505 node.copy_source = copy_source.map(Cow::Borrowed);
506 506 map.nodes_with_entry_count += 1;
507 507 if copy_source.is_some() {
508 508 map.nodes_with_copy_source_count += 1
509 509 }
510 510 Ok(())
511 511 },
512 512 )?;
513 513 let parents = Some(parents.clone());
514 514
515 515 Ok((map, parents))
516 516 }
517 517
518 518 /// Assuming dirstate-v2 format, returns whether the next write should
519 519 /// append to the existing data file that contains `self.on_disk` (true),
520 520 /// or create a new data file from scratch (false).
521 521 pub(super) fn write_should_append(&self) -> bool {
522 522 let ratio = self.unreachable_bytes as f32 / self.on_disk.len() as f32;
523 523 ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
524 524 }
525 525
526 526 fn get_node<'tree>(
527 527 &'tree self,
528 528 path: &HgPath,
529 529 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
530 530 let mut children = self.root.as_ref();
531 531 let mut components = path.components();
532 532 let mut component =
533 533 components.next().expect("expected at least one components");
534 534 loop {
535 535 if let Some(child) = children.get(component, self.on_disk)? {
536 536 if let Some(next_component) = components.next() {
537 537 component = next_component;
538 538 children = child.children(self.on_disk)?;
539 539 } else {
540 540 return Ok(Some(child));
541 541 }
542 542 } else {
543 543 return Ok(None);
544 544 }
545 545 }
546 546 }
547 547
548 548 /// Returns a mutable reference to the node at `path` if it exists
549 549 ///
550 550 /// `each_ancestor` is a callback that is called for each ancestor node
551 551 /// when descending the tree. It is used to keep the different counters
552 552 /// of the `DirstateMap` up-to-date.
553 553 fn get_node_mut<'tree>(
554 554 &'tree mut self,
555 555 path: &HgPath,
556 556 each_ancestor: impl FnMut(&mut Node),
557 557 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
558 558 Self::get_node_mut_inner(
559 559 self.on_disk,
560 560 &mut self.unreachable_bytes,
561 561 &mut self.root,
562 562 path,
563 563 each_ancestor,
564 564 )
565 565 }
566 566
567 567 /// Lower-level version of `get_node_mut`.
568 568 ///
569 569 /// This takes `root` instead of `&mut self` so that callers can mutate
570 570 /// other fields while the returned borrow is still valid.
571 571 ///
572 572 /// `each_ancestor` is a callback that is called for each ancestor node
573 573 /// when descending the tree. It is used to keep the different counters
574 574 /// of the `DirstateMap` up-to-date.
575 575 fn get_node_mut_inner<'tree>(
576 576 on_disk: &'on_disk [u8],
577 577 unreachable_bytes: &mut u32,
578 578 root: &'tree mut ChildNodes<'on_disk>,
579 579 path: &HgPath,
580 580 mut each_ancestor: impl FnMut(&mut Node),
581 581 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
582 582 let mut children = root;
583 583 let mut components = path.components();
584 584 let mut component =
585 585 components.next().expect("expected at least one components");
586 586 loop {
587 587 if let Some(child) = children
588 588 .make_mut(on_disk, unreachable_bytes)?
589 589 .get_mut(component)
590 590 {
591 591 if let Some(next_component) = components.next() {
592 592 each_ancestor(child);
593 593 component = next_component;
594 594 children = &mut child.children;
595 595 } else {
596 596 return Ok(Some(child));
597 597 }
598 598 } else {
599 599 return Ok(None);
600 600 }
601 601 }
602 602 }
603 603
604 604 /// Get a mutable reference to the node at `path`, creating it if it does
605 605 /// not exist.
606 606 ///
607 607 /// `each_ancestor` is a callback that is called for each ancestor node
608 608 /// when descending the tree. It is used to keep the different counters
609 609 /// of the `DirstateMap` up-to-date.
610 610 fn get_or_insert_node<'tree, 'path>(
611 611 &'tree mut self,
612 612 path: &'path HgPath,
613 613 each_ancestor: impl FnMut(&mut Node),
614 614 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
615 615 Self::get_or_insert_node_inner(
616 616 self.on_disk,
617 617 &mut self.unreachable_bytes,
618 618 &mut self.root,
619 619 path,
620 620 WithBasename::to_cow_owned,
621 621 each_ancestor,
622 622 )
623 623 }
624 624
625 625 /// Lower-level version of `get_or_insert_node_inner`, which is used when
626 626 /// parsing disk data to remove allocations for new nodes.
627 627 fn get_or_insert_node_inner<'tree, 'path>(
628 628 on_disk: &'on_disk [u8],
629 629 unreachable_bytes: &mut u32,
630 630 root: &'tree mut ChildNodes<'on_disk>,
631 631 path: &'path HgPath,
632 632 to_cow: impl Fn(
633 633 WithBasename<&'path HgPath>,
634 634 ) -> WithBasename<Cow<'on_disk, HgPath>>,
635 635 mut each_ancestor: impl FnMut(&mut Node),
636 636 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
637 637 let mut child_nodes = root;
638 638 let mut inclusive_ancestor_paths =
639 639 WithBasename::inclusive_ancestors_of(path);
640 640 let mut ancestor_path = inclusive_ancestor_paths
641 641 .next()
642 642 .expect("expected at least one inclusive ancestor");
643 643 loop {
644 644 let (_, child_node) = child_nodes
645 645 .make_mut(on_disk, unreachable_bytes)?
646 646 .raw_entry_mut()
647 647 .from_key(ancestor_path.base_name())
648 648 .or_insert_with(|| (to_cow(ancestor_path), Node::default()));
649 649 if let Some(next) = inclusive_ancestor_paths.next() {
650 650 each_ancestor(child_node);
651 651 ancestor_path = next;
652 652 child_nodes = &mut child_node.children;
653 653 } else {
654 654 return Ok(child_node);
655 655 }
656 656 }
657 657 }
658 658
659 659 fn reset_state(
660 660 &mut self,
661 661 filename: &HgPath,
662 662 old_entry_opt: Option<DirstateEntry>,
663 663 wc_tracked: bool,
664 664 p1_tracked: bool,
665 665 p2_info: bool,
666 666 has_meaningful_mtime: bool,
667 667 parent_file_data_opt: Option<ParentFileData>,
668 668 ) -> Result<(), DirstateError> {
669 669 let (had_entry, was_tracked) = match old_entry_opt {
670 670 Some(old_entry) => (true, old_entry.tracked()),
671 671 None => (false, false),
672 672 };
673 673 let node = self.get_or_insert_node(filename, |ancestor| {
674 674 if !had_entry {
675 675 ancestor.descendants_with_entry_count += 1;
676 676 }
677 677 if was_tracked {
678 678 if !wc_tracked {
679 679 ancestor.tracked_descendants_count = ancestor
680 680 .tracked_descendants_count
681 681 .checked_sub(1)
682 682 .expect("tracked count to be >= 0");
683 683 }
684 684 } else {
685 685 if wc_tracked {
686 686 ancestor.tracked_descendants_count += 1;
687 687 }
688 688 }
689 689 })?;
690 690
691 691 let v2_data = if let Some(parent_file_data) = parent_file_data_opt {
692 692 DirstateV2Data {
693 693 wc_tracked,
694 694 p1_tracked,
695 695 p2_info,
696 696 mode_size: parent_file_data.mode_size,
697 697 mtime: if has_meaningful_mtime {
698 698 parent_file_data.mtime
699 699 } else {
700 700 None
701 701 },
702 702 ..Default::default()
703 703 }
704 704 } else {
705 705 DirstateV2Data {
706 706 wc_tracked,
707 707 p1_tracked,
708 708 p2_info,
709 709 ..Default::default()
710 710 }
711 711 };
712 712 node.data = NodeData::Entry(DirstateEntry::from_v2_data(v2_data));
713 713 if !had_entry {
714 714 self.nodes_with_entry_count += 1;
715 715 }
716 716 Ok(())
717 717 }
718 718
719 719 fn set_tracked(
720 720 &mut self,
721 721 filename: &HgPath,
722 722 old_entry_opt: Option<DirstateEntry>,
723 723 ) -> Result<bool, DirstateV2ParseError> {
724 724 let was_tracked = old_entry_opt.map_or(false, |e| e.tracked());
725 725 let had_entry = old_entry_opt.is_some();
726 726 let tracked_count_increment = if was_tracked { 0 } else { 1 };
727 727 let mut new = false;
728 728
729 729 let node = self.get_or_insert_node(filename, |ancestor| {
730 730 if !had_entry {
731 731 ancestor.descendants_with_entry_count += 1;
732 732 }
733 733
734 734 ancestor.tracked_descendants_count += tracked_count_increment;
735 735 })?;
736 736 if let Some(old_entry) = old_entry_opt {
737 737 let mut e = old_entry.clone();
738 738 if e.tracked() {
739 739 // XXX
740 740 // This is probably overkill for more case, but we need this to
741 741 // fully replace the `normallookup` call with `set_tracked`
742 742 // one. Consider smoothing this in the future.
743 743 e.set_possibly_dirty();
744 744 } else {
745 745 new = true;
746 746 e.set_tracked();
747 747 }
748 748 node.data = NodeData::Entry(e)
749 749 } else {
750 750 node.data = NodeData::Entry(DirstateEntry::new_tracked());
751 751 self.nodes_with_entry_count += 1;
752 752 new = true;
753 753 };
754 754 Ok(new)
755 755 }
756 756
757 757 /// Set a node as untracked in the dirstate.
758 758 ///
759 759 /// It is the responsibility of the caller to remove the copy source and/or
760 760 /// the entry itself if appropriate.
761 761 ///
762 762 /// # Panics
763 763 ///
764 764 /// Panics if the node does not exist.
765 765 fn set_untracked(
766 766 &mut self,
767 767 filename: &HgPath,
768 768 old_entry: DirstateEntry,
769 769 ) -> Result<(), DirstateV2ParseError> {
770 770 let node = self
771 771 .get_node_mut(filename, |ancestor| {
772 772 ancestor.tracked_descendants_count = ancestor
773 773 .tracked_descendants_count
774 774 .checked_sub(1)
775 775 .expect("tracked_descendants_count should be >= 0");
776 776 })?
777 777 .expect("node should exist");
778 778 let mut new_entry = old_entry.clone();
779 779 new_entry.set_untracked();
780 780 node.data = NodeData::Entry(new_entry);
781 781 Ok(())
782 782 }
783 783
784 784 /// Set a node as clean in the dirstate.
785 785 ///
786 786 /// It is the responsibility of the caller to remove the copy source.
787 787 ///
788 788 /// # Panics
789 789 ///
790 790 /// Panics if the node does not exist.
791 791 fn set_clean(
792 792 &mut self,
793 793 filename: &HgPath,
794 794 old_entry: DirstateEntry,
795 795 mode: u32,
796 796 size: u32,
797 797 mtime: TruncatedTimestamp,
798 798 ) -> Result<(), DirstateError> {
799 799 let node = self
800 800 .get_node_mut(filename, |ancestor| {
801 801 if !old_entry.tracked() {
802 802 ancestor.tracked_descendants_count += 1;
803 803 }
804 804 })?
805 805 .expect("node should exist");
806 806 let mut new_entry = old_entry.clone();
807 807 new_entry.set_clean(mode, size, mtime);
808 808 node.data = NodeData::Entry(new_entry);
809 809 Ok(())
810 810 }
811 811
812 812 /// Set a node as possibly dirty in the dirstate.
813 813 ///
814 814 /// # Panics
815 815 ///
816 816 /// Panics if the node does not exist.
817 817 fn set_possibly_dirty(
818 818 &mut self,
819 819 filename: &HgPath,
820 820 ) -> Result<(), DirstateError> {
821 821 let node = self
822 822 .get_node_mut(filename, |_ancestor| {})?
823 823 .expect("node should exist");
824 824 let entry = node.data.as_entry_mut().expect("entry should exist");
825 825 entry.set_possibly_dirty();
826 826 node.data = NodeData::Entry(*entry);
827 827 Ok(())
828 828 }
829 829
830 830 /// Clears the cached mtime for the (potential) folder at `path`.
831 831 pub(super) fn clear_cached_mtime(
832 832 &mut self,
833 833 path: &HgPath,
834 834 ) -> Result<(), DirstateV2ParseError> {
835 835 let node = match self.get_node_mut(path, |_ancestor| {})? {
836 836 Some(node) => node,
837 837 None => return Ok(()),
838 838 };
839 839 if let NodeData::CachedDirectory { .. } = &node.data {
840 840 node.data = NodeData::None
841 841 }
842 842 Ok(())
843 843 }
844 844
845 845 /// Sets the cached mtime for the (potential) folder at `path`.
846 846 pub(super) fn set_cached_mtime(
847 847 &mut self,
848 848 path: &HgPath,
849 849 mtime: TruncatedTimestamp,
850 850 ) -> Result<(), DirstateV2ParseError> {
851 851 let node = match self.get_node_mut(path, |_ancestor| {})? {
852 852 Some(node) => node,
853 853 None => return Ok(()),
854 854 };
855 855 match &node.data {
856 856 NodeData::Entry(_) => {} // Don’t overwrite an entry
857 857 NodeData::CachedDirectory { .. } | NodeData::None => {
858 858 node.data = NodeData::CachedDirectory { mtime }
859 859 }
860 860 }
861 861 Ok(())
862 862 }
863 863
864 864 fn iter_nodes<'tree>(
865 865 &'tree self,
866 866 ) -> impl Iterator<
867 867 Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>,
868 868 > + 'tree {
869 869 // Depth first tree traversal.
870 870 //
871 871 // If we could afford internal iteration and recursion,
872 872 // this would look like:
873 873 //
874 874 // ```
875 875 // fn traverse_children(
876 876 // children: &ChildNodes,
877 877 // each: &mut impl FnMut(&Node),
878 878 // ) {
879 879 // for child in children.values() {
880 880 // traverse_children(&child.children, each);
881 881 // each(child);
882 882 // }
883 883 // }
884 884 // ```
885 885 //
886 886 // However we want an external iterator and therefore can’t use the
887 887 // call stack. Use an explicit stack instead:
888 888 let mut stack = Vec::new();
889 889 let mut iter = self.root.as_ref().iter();
890 890 std::iter::from_fn(move || {
891 891 while let Some(child_node) = iter.next() {
892 892 let children = match child_node.children(self.on_disk) {
893 893 Ok(children) => children,
894 894 Err(error) => return Some(Err(error)),
895 895 };
896 896 // Pseudo-recursion
897 897 let new_iter = children.iter();
898 898 let old_iter = std::mem::replace(&mut iter, new_iter);
899 899 stack.push((child_node, old_iter));
900 900 }
901 901 // Found the end of a `children.iter()` iterator.
902 902 if let Some((child_node, next_iter)) = stack.pop() {
903 903 // "Return" from pseudo-recursion by restoring state from the
904 904 // explicit stack
905 905 iter = next_iter;
906 906
907 907 Some(Ok(child_node))
908 908 } else {
909 909 // Reached the bottom of the stack, we’re done
910 910 None
911 911 }
912 912 })
913 913 }
914 914
915 915 fn count_dropped_path(unreachable_bytes: &mut u32, path: &Cow<HgPath>) {
916 916 if let Cow::Borrowed(path) = path {
917 917 *unreachable_bytes += path.len() as u32
918 918 }
919 919 }
920 920 }
921 921
922 922 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
923 923 ///
924 924 /// The callback is only called for incoming `Ok` values. Errors are passed
925 925 /// through as-is. In order to let it use the `?` operator the callback is
926 926 /// expected to return a `Result` of `Option`, instead of an `Option` of
927 927 /// `Result`.
928 928 fn filter_map_results<'a, I, F, A, B, E>(
929 929 iter: I,
930 930 f: F,
931 931 ) -> impl Iterator<Item = Result<B, E>> + 'a
932 932 where
933 933 I: Iterator<Item = Result<A, E>> + 'a,
934 934 F: Fn(A) -> Result<Option<B>, E> + 'a,
935 935 {
936 936 iter.filter_map(move |result| match result {
937 937 Ok(node) => f(node).transpose(),
938 938 Err(e) => Some(Err(e)),
939 939 })
940 940 }
941 941
942 942 impl OwningDirstateMap {
943 943 pub fn clear(&mut self) {
944 944 self.with_dmap_mut(|map| {
945 945 map.root = Default::default();
946 946 map.nodes_with_entry_count = 0;
947 947 map.nodes_with_copy_source_count = 0;
948 948 });
949 949 }
950 950
951 951 pub fn set_tracked(
952 952 &mut self,
953 953 filename: &HgPath,
954 954 ) -> Result<bool, DirstateV2ParseError> {
955 955 let old_entry_opt = self.get(filename)?;
956 956 self.with_dmap_mut(|map| map.set_tracked(filename, old_entry_opt))
957 957 }
958 958
959 959 pub fn set_untracked(
960 960 &mut self,
961 961 filename: &HgPath,
962 962 ) -> Result<bool, DirstateError> {
963 963 let old_entry_opt = self.get(filename)?;
964 964 match old_entry_opt {
965 965 None => Ok(false),
966 966 Some(old_entry) => {
967 967 if !old_entry.tracked() {
968 968 // `DirstateMap::set_untracked` is not a noop if
969 969 // already not tracked as it will decrement the
970 970 // tracked counters while going down.
971 971 return Ok(true);
972 972 }
973 973 if old_entry.added() {
974 974 // Untracking an "added" entry will just result in a
975 975 // worthless entry (and other parts of the code will
976 976 // complain about it), just drop it entirely.
977 977 self.drop_entry_and_copy_source(filename)?;
978 978 return Ok(true);
979 979 }
980 980 if !old_entry.p2_info() {
981 981 self.copy_map_remove(filename)?;
982 982 }
983 983
984 984 self.with_dmap_mut(|map| {
985 985 map.set_untracked(filename, old_entry)?;
986 986 Ok(true)
987 987 })
988 988 }
989 989 }
990 990 }
991 991
992 992 pub fn set_clean(
993 993 &mut self,
994 994 filename: &HgPath,
995 995 mode: u32,
996 996 size: u32,
997 997 mtime: TruncatedTimestamp,
998 998 ) -> Result<(), DirstateError> {
999 999 let old_entry = match self.get(filename)? {
1000 1000 None => {
1001 1001 return Err(
1002 1002 DirstateMapError::PathNotFound(filename.into()).into()
1003 1003 )
1004 1004 }
1005 1005 Some(e) => e,
1006 1006 };
1007 1007 self.copy_map_remove(filename)?;
1008 1008 self.with_dmap_mut(|map| {
1009 1009 map.set_clean(filename, old_entry, mode, size, mtime)
1010 1010 })
1011 1011 }
1012 1012
1013 1013 pub fn set_possibly_dirty(
1014 1014 &mut self,
1015 1015 filename: &HgPath,
1016 1016 ) -> Result<(), DirstateError> {
1017 1017 if self.get(filename)?.is_none() {
1018 1018 return Err(DirstateMapError::PathNotFound(filename.into()).into());
1019 1019 }
1020 1020 self.with_dmap_mut(|map| map.set_possibly_dirty(filename))
1021 1021 }
1022 1022
1023 1023 pub fn reset_state(
1024 1024 &mut self,
1025 1025 filename: &HgPath,
1026 1026 wc_tracked: bool,
1027 1027 p1_tracked: bool,
1028 1028 p2_info: bool,
1029 1029 has_meaningful_mtime: bool,
1030 1030 parent_file_data_opt: Option<ParentFileData>,
1031 1031 ) -> Result<(), DirstateError> {
1032 1032 if !(p1_tracked || p2_info || wc_tracked) {
1033 1033 self.drop_entry_and_copy_source(filename)?;
1034 1034 return Ok(());
1035 1035 }
1036 1036 self.copy_map_remove(filename)?;
1037 1037 let old_entry_opt = self.get(filename)?;
1038 1038 self.with_dmap_mut(|map| {
1039 1039 map.reset_state(
1040 1040 filename,
1041 1041 old_entry_opt,
1042 1042 wc_tracked,
1043 1043 p1_tracked,
1044 1044 p2_info,
1045 1045 has_meaningful_mtime,
1046 1046 parent_file_data_opt,
1047 1047 )
1048 1048 })
1049 1049 }
1050 1050
1051 1051 pub fn drop_entry_and_copy_source(
1052 1052 &mut self,
1053 1053 filename: &HgPath,
1054 1054 ) -> Result<(), DirstateError> {
1055 1055 let was_tracked = self.get(filename)?.map_or(false, |e| e.tracked());
1056 1056 struct Dropped {
1057 1057 was_tracked: bool,
1058 1058 had_entry: bool,
1059 1059 had_copy_source: bool,
1060 1060 }
1061 1061
1062 1062 /// If this returns `Ok(Some((dropped, removed)))`, then
1063 1063 ///
1064 1064 /// * `dropped` is about the leaf node that was at `filename`
1065 1065 /// * `removed` is whether this particular level of recursion just
1066 1066 /// removed a node in `nodes`.
1067 1067 fn recur<'on_disk>(
1068 1068 on_disk: &'on_disk [u8],
1069 1069 unreachable_bytes: &mut u32,
1070 1070 nodes: &mut ChildNodes<'on_disk>,
1071 1071 path: &HgPath,
1072 1072 ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
1073 1073 let (first_path_component, rest_of_path) =
1074 1074 path.split_first_component();
1075 1075 let nodes = nodes.make_mut(on_disk, unreachable_bytes)?;
1076 1076 let node = if let Some(node) = nodes.get_mut(first_path_component)
1077 1077 {
1078 1078 node
1079 1079 } else {
1080 1080 return Ok(None);
1081 1081 };
1082 1082 let dropped;
1083 1083 if let Some(rest) = rest_of_path {
1084 1084 if let Some((d, removed)) = recur(
1085 1085 on_disk,
1086 1086 unreachable_bytes,
1087 1087 &mut node.children,
1088 1088 rest,
1089 1089 )? {
1090 1090 dropped = d;
1091 1091 if dropped.had_entry {
1092 1092 node.descendants_with_entry_count = node
1093 1093 .descendants_with_entry_count
1094 1094 .checked_sub(1)
1095 1095 .expect(
1096 1096 "descendants_with_entry_count should be >= 0",
1097 1097 );
1098 1098 }
1099 1099 if dropped.was_tracked {
1100 1100 node.tracked_descendants_count = node
1101 1101 .tracked_descendants_count
1102 1102 .checked_sub(1)
1103 1103 .expect(
1104 1104 "tracked_descendants_count should be >= 0",
1105 1105 );
1106 1106 }
1107 1107
1108 1108 // Directory caches must be invalidated when removing a
1109 1109 // child node
1110 1110 if removed {
1111 1111 if let NodeData::CachedDirectory { .. } = &node.data {
1112 1112 node.data = NodeData::None
1113 1113 }
1114 1114 }
1115 1115 } else {
1116 1116 return Ok(None);
1117 1117 }
1118 1118 } else {
1119 1119 let entry = node.data.as_entry();
1120 1120 let was_tracked = entry.map_or(false, |entry| entry.tracked());
1121 1121 let had_entry = entry.is_some();
1122 1122 if had_entry {
1123 1123 node.data = NodeData::None
1124 1124 }
1125 1125 let mut had_copy_source = false;
1126 1126 if let Some(source) = &node.copy_source {
1127 1127 DirstateMap::count_dropped_path(unreachable_bytes, source);
1128 1128 had_copy_source = true;
1129 1129 node.copy_source = None
1130 1130 }
1131 1131 dropped = Dropped {
1132 1132 was_tracked,
1133 1133 had_entry,
1134 1134 had_copy_source,
1135 1135 };
1136 1136 }
1137 1137 // After recursion, for both leaf (rest_of_path is None) nodes and
1138 1138 // parent nodes, remove a node if it just became empty.
1139 1139 let remove = !node.data.has_entry()
1140 1140 && node.copy_source.is_none()
1141 1141 && node.children.is_empty();
1142 1142 if remove {
1143 1143 let (key, _) =
1144 1144 nodes.remove_entry(first_path_component).unwrap();
1145 1145 DirstateMap::count_dropped_path(
1146 1146 unreachable_bytes,
1147 1147 key.full_path(),
1148 1148 )
1149 1149 }
1150 1150 Ok(Some((dropped, remove)))
1151 1151 }
1152 1152
1153 1153 self.with_dmap_mut(|map| {
1154 1154 if let Some((dropped, _removed)) = recur(
1155 1155 map.on_disk,
1156 1156 &mut map.unreachable_bytes,
1157 1157 &mut map.root,
1158 1158 filename,
1159 1159 )? {
1160 1160 if dropped.had_entry {
1161 1161 map.nodes_with_entry_count = map
1162 1162 .nodes_with_entry_count
1163 1163 .checked_sub(1)
1164 1164 .expect("nodes_with_entry_count should be >= 0");
1165 1165 }
1166 1166 if dropped.had_copy_source {
1167 1167 map.nodes_with_copy_source_count = map
1168 1168 .nodes_with_copy_source_count
1169 1169 .checked_sub(1)
1170 1170 .expect("nodes_with_copy_source_count should be >= 0");
1171 1171 }
1172 1172 } else {
1173 1173 debug_assert!(!was_tracked);
1174 1174 }
1175 1175 Ok(())
1176 1176 })
1177 1177 }
1178 1178
1179 1179 pub fn has_tracked_dir(
1180 1180 &mut self,
1181 1181 directory: &HgPath,
1182 1182 ) -> Result<bool, DirstateError> {
1183 1183 self.with_dmap_mut(|map| {
1184 1184 if let Some(node) = map.get_node(directory)? {
1185 1185 // A node without a `DirstateEntry` was created to hold child
1186 1186 // nodes, and is therefore a directory.
1187 1187 let is_dir = node.entry()?.is_none();
1188 1188 Ok(is_dir && node.tracked_descendants_count() > 0)
1189 1189 } else {
1190 1190 Ok(false)
1191 1191 }
1192 1192 })
1193 1193 }
1194 1194
1195 1195 pub fn has_dir(
1196 1196 &mut self,
1197 1197 directory: &HgPath,
1198 1198 ) -> Result<bool, DirstateError> {
1199 1199 self.with_dmap_mut(|map| {
1200 1200 if let Some(node) = map.get_node(directory)? {
1201 1201 // A node without a `DirstateEntry` was created to hold child
1202 1202 // nodes, and is therefore a directory.
1203 1203 let is_dir = node.entry()?.is_none();
1204 1204 Ok(is_dir && node.descendants_with_entry_count() > 0)
1205 1205 } else {
1206 1206 Ok(false)
1207 1207 }
1208 1208 })
1209 1209 }
1210 1210
1211 1211 #[timed]
1212 1212 pub fn pack_v1(
1213 1213 &self,
1214 1214 parents: DirstateParents,
1215 1215 ) -> Result<Vec<u8>, DirstateError> {
1216 1216 let map = self.get_map();
1217 1217 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
1218 1218 // reallocations
1219 1219 let mut size = parents.as_bytes().len();
1220 1220 for node in map.iter_nodes() {
1221 1221 let node = node?;
1222 1222 if node.entry()?.is_some() {
1223 1223 size += packed_entry_size(
1224 1224 node.full_path(map.on_disk)?,
1225 1225 node.copy_source(map.on_disk)?,
1226 1226 );
1227 1227 }
1228 1228 }
1229 1229
1230 1230 let mut packed = Vec::with_capacity(size);
1231 1231 packed.extend(parents.as_bytes());
1232 1232
1233 1233 for node in map.iter_nodes() {
1234 1234 let node = node?;
1235 1235 if let Some(entry) = node.entry()? {
1236 1236 pack_entry(
1237 1237 node.full_path(map.on_disk)?,
1238 1238 &entry,
1239 1239 node.copy_source(map.on_disk)?,
1240 1240 &mut packed,
1241 1241 );
1242 1242 }
1243 1243 }
1244 1244 Ok(packed)
1245 1245 }
1246 1246
1247 1247 /// Returns new data and metadata together with whether that data should be
1248 1248 /// appended to the existing data file whose content is at
1249 1249 /// `map.on_disk` (true), instead of written to a new data file
1250 1250 /// (false), and the previous size of data on disk.
1251 1251 #[timed]
1252 1252 pub fn pack_v2(
1253 1253 &self,
1254 1254 can_append: bool,
1255 1255 ) -> Result<(Vec<u8>, on_disk::TreeMetadata, bool, usize), DirstateError>
1256 1256 {
1257 1257 let map = self.get_map();
1258 1258 on_disk::write(map, can_append)
1259 1259 }
1260 1260
1261 1261 /// `callback` allows the caller to process and do something with the
1262 1262 /// results of the status. This is needed to do so efficiently (i.e.
1263 1263 /// without cloning the `DirstateStatus` object with its paths) because
1264 1264 /// we need to borrow from `Self`.
1265 1265 pub fn with_status<R>(
1266 1266 &mut self,
1267 1267 matcher: &(dyn Matcher + Sync),
1268 1268 root_dir: PathBuf,
1269 1269 ignore_files: Vec<PathBuf>,
1270 1270 options: StatusOptions,
1271 1271 callback: impl for<'r> FnOnce(
1272 1272 Result<(DirstateStatus<'r>, Vec<PatternFileWarning>), StatusError>,
1273 1273 ) -> R,
1274 1274 ) -> R {
1275 1275 self.with_dmap_mut(|map| {
1276 1276 callback(super::status::status(
1277 1277 map,
1278 1278 matcher,
1279 1279 root_dir,
1280 1280 ignore_files,
1281 1281 options,
1282 1282 ))
1283 1283 })
1284 1284 }
1285 1285
1286 1286 pub fn copy_map_len(&self) -> usize {
1287 1287 let map = self.get_map();
1288 1288 map.nodes_with_copy_source_count as usize
1289 1289 }
1290 1290
1291 1291 pub fn copy_map_iter(&self) -> CopyMapIter<'_> {
1292 1292 let map = self.get_map();
1293 1293 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1294 1294 Ok(if let Some(source) = node.copy_source(map.on_disk)? {
1295 1295 Some((node.full_path(map.on_disk)?, source))
1296 1296 } else {
1297 1297 None
1298 1298 })
1299 1299 }))
1300 1300 }
1301 1301
1302 1302 pub fn copy_map_contains_key(
1303 1303 &self,
1304 1304 key: &HgPath,
1305 1305 ) -> Result<bool, DirstateV2ParseError> {
1306 1306 let map = self.get_map();
1307 1307 Ok(if let Some(node) = map.get_node(key)? {
1308 1308 node.has_copy_source()
1309 1309 } else {
1310 1310 false
1311 1311 })
1312 1312 }
1313 1313
1314 1314 pub fn copy_map_get(
1315 1315 &self,
1316 1316 key: &HgPath,
1317 1317 ) -> Result<Option<&HgPath>, DirstateV2ParseError> {
1318 1318 let map = self.get_map();
1319 1319 if let Some(node) = map.get_node(key)? {
1320 1320 if let Some(source) = node.copy_source(map.on_disk)? {
1321 1321 return Ok(Some(source));
1322 1322 }
1323 1323 }
1324 1324 Ok(None)
1325 1325 }
1326 1326
1327 1327 pub fn copy_map_remove(
1328 1328 &mut self,
1329 1329 key: &HgPath,
1330 1330 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1331 1331 self.with_dmap_mut(|map| {
1332 1332 let count = &mut map.nodes_with_copy_source_count;
1333 1333 let unreachable_bytes = &mut map.unreachable_bytes;
1334 1334 Ok(DirstateMap::get_node_mut_inner(
1335 1335 map.on_disk,
1336 1336 unreachable_bytes,
1337 1337 &mut map.root,
1338 1338 key,
1339 1339 |_ancestor| {},
1340 1340 )?
1341 1341 .and_then(|node| {
1342 1342 if let Some(source) = &node.copy_source {
1343 1343 *count = count
1344 1344 .checked_sub(1)
1345 1345 .expect("nodes_with_copy_source_count should be >= 0");
1346 1346 DirstateMap::count_dropped_path(unreachable_bytes, source);
1347 1347 }
1348 1348 node.copy_source.take().map(Cow::into_owned)
1349 1349 }))
1350 1350 })
1351 1351 }
1352 1352
1353 1353 pub fn copy_map_insert(
1354 1354 &mut self,
1355 1355 key: &HgPath,
1356 1356 value: &HgPath,
1357 1357 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1358 1358 self.with_dmap_mut(|map| {
1359 1359 let node = map.get_or_insert_node(&key, |_ancestor| {})?;
1360 1360 let had_copy_source = node.copy_source.is_none();
1361 1361 let old = node
1362 1362 .copy_source
1363 1363 .replace(value.to_owned().into())
1364 1364 .map(Cow::into_owned);
1365 1365 if had_copy_source {
1366 1366 map.nodes_with_copy_source_count += 1
1367 1367 }
1368 1368 Ok(old)
1369 1369 })
1370 1370 }
1371 1371
1372 1372 pub fn len(&self) -> usize {
1373 1373 let map = self.get_map();
1374 1374 map.nodes_with_entry_count as usize
1375 1375 }
1376 1376
1377 1377 pub fn contains_key(
1378 1378 &self,
1379 1379 key: &HgPath,
1380 1380 ) -> Result<bool, DirstateV2ParseError> {
1381 1381 Ok(self.get(key)?.is_some())
1382 1382 }
1383 1383
1384 1384 pub fn get(
1385 1385 &self,
1386 1386 key: &HgPath,
1387 1387 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
1388 1388 let map = self.get_map();
1389 1389 Ok(if let Some(node) = map.get_node(key)? {
1390 1390 node.entry()?
1391 1391 } else {
1392 1392 None
1393 1393 })
1394 1394 }
1395 1395
1396 1396 pub fn iter(&self) -> StateMapIter<'_> {
1397 1397 let map = self.get_map();
1398 1398 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1399 1399 Ok(if let Some(entry) = node.entry()? {
1400 1400 Some((node.full_path(map.on_disk)?, entry))
1401 1401 } else {
1402 1402 None
1403 1403 })
1404 1404 }))
1405 1405 }
1406 1406
1407 1407 pub fn iter_tracked_dirs(
1408 1408 &mut self,
1409 1409 ) -> Result<
1410 1410 Box<
1411 1411 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
1412 1412 + Send
1413 1413 + '_,
1414 1414 >,
1415 1415 DirstateError,
1416 1416 > {
1417 1417 let map = self.get_map();
1418 1418 let on_disk = map.on_disk;
1419 1419 Ok(Box::new(filter_map_results(
1420 1420 map.iter_nodes(),
1421 1421 move |node| {
1422 1422 Ok(if node.tracked_descendants_count() > 0 {
1423 1423 Some(node.full_path(on_disk)?)
1424 1424 } else {
1425 1425 None
1426 1426 })
1427 1427 },
1428 1428 )))
1429 1429 }
1430 1430
1431 1431 /// Only public because it needs to be exposed to the Python layer.
1432 1432 /// It is not the full `setparents` logic, only the parts that mutate the
1433 1433 /// entries.
1434 1434 pub fn setparents_fixup(
1435 1435 &mut self,
1436 1436 ) -> Result<Vec<(HgPathBuf, HgPathBuf)>, DirstateV2ParseError> {
1437 1437 // XXX
1438 1438 // All the copying and re-querying is quite inefficient, but this is
1439 1439 // still a lot better than doing it from Python.
1440 1440 //
1441 1441 // The better solution is to develop a mechanism for `iter_mut`,
1442 1442 // which will be a lot more involved: we're dealing with a lazy,
1443 1443 // append-mostly, tree-like data structure. This will do for now.
1444 1444 let mut copies = vec![];
1445 1445 let mut files_with_p2_info = vec![];
1446 1446 for res in self.iter() {
1447 1447 let (path, entry) = res?;
1448 1448 if entry.p2_info() {
1449 1449 files_with_p2_info.push(path.to_owned())
1450 1450 }
1451 1451 }
1452 1452 self.with_dmap_mut(|map| {
1453 1453 for path in files_with_p2_info.iter() {
1454 1454 let node = map.get_or_insert_node(path, |_| {})?;
1455 1455 let entry =
1456 1456 node.data.as_entry_mut().expect("entry should exist");
1457 1457 entry.drop_merge_data();
1458 1458 if let Some(source) = node.copy_source.take().as_deref() {
1459 1459 copies.push((path.to_owned(), source.to_owned()));
1460 1460 }
1461 1461 }
1462 1462 Ok(copies)
1463 1463 })
1464 1464 }
1465 1465
1466 1466 pub fn debug_iter(
1467 1467 &self,
1468 1468 all: bool,
1469 1469 ) -> Box<
1470 1470 dyn Iterator<
1471 1471 Item = Result<
1472 1472 (&HgPath, (u8, i32, i32, i32)),
1473 1473 DirstateV2ParseError,
1474 1474 >,
1475 1475 > + Send
1476 1476 + '_,
1477 1477 > {
1478 1478 let map = self.get_map();
1479 1479 Box::new(filter_map_results(map.iter_nodes(), move |node| {
1480 1480 let debug_tuple = if let Some(entry) = node.entry()? {
1481 1481 entry.debug_tuple()
1482 1482 } else if !all {
1483 1483 return Ok(None);
1484 1484 } else if let Some(mtime) = node.cached_directory_mtime()? {
1485 1485 (b' ', 0, -1, mtime.truncated_seconds() as i32)
1486 1486 } else {
1487 1487 (b' ', 0, -1, -1)
1488 1488 };
1489 1489 Ok(Some((node.full_path(map.on_disk)?, debug_tuple)))
1490 1490 }))
1491 1491 }
1492 1492 }
1493 1493 #[cfg(test)]
1494 1494 mod tests {
1495 1495 use super::*;
1496 1496
1497 1497 /// Shortcut to return tracked descendants of a path.
1498 1498 /// Panics if the path does not exist.
1499 1499 fn tracked_descendants(map: &OwningDirstateMap, path: &[u8]) -> u32 {
1500 1500 let path = dbg!(HgPath::new(path));
1501 1501 let node = map.get_map().get_node(path);
1502 1502 node.unwrap().unwrap().tracked_descendants_count()
1503 1503 }
1504 1504
1505 1505 /// Shortcut to return descendants with an entry.
1506 1506 /// Panics if the path does not exist.
1507 1507 fn descendants_with_an_entry(map: &OwningDirstateMap, path: &[u8]) -> u32 {
1508 1508 let path = dbg!(HgPath::new(path));
1509 1509 let node = map.get_map().get_node(path);
1510 1510 node.unwrap().unwrap().descendants_with_entry_count()
1511 1511 }
1512 1512
1513 1513 fn assert_does_not_exist(map: &OwningDirstateMap, path: &[u8]) {
1514 1514 let path = dbg!(HgPath::new(path));
1515 1515 let node = map.get_map().get_node(path);
1516 1516 assert!(node.unwrap().is_none());
1517 1517 }
1518 1518
1519 1519 /// Shortcut for path creation in tests
1520 1520 fn p(b: &[u8]) -> &HgPath {
1521 1521 HgPath::new(b)
1522 1522 }
1523 1523
1524 1524 /// Test the very simple case a single tracked file
1525 1525 #[test]
1526 1526 fn test_tracked_descendants_simple() -> Result<(), DirstateError> {
1527 1527 let mut map = OwningDirstateMap::new_empty(vec![]);
1528 1528 assert_eq!(map.len(), 0);
1529 1529
1530 1530 map.set_tracked(p(b"some/nested/path"))?;
1531 1531
1532 1532 assert_eq!(map.len(), 1);
1533 1533 assert_eq!(tracked_descendants(&map, b"some"), 1);
1534 1534 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1535 1535 assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0);
1536 1536
1537 1537 map.set_untracked(p(b"some/nested/path"))?;
1538 1538 assert_eq!(map.len(), 0);
1539 1539 assert!(map.get_map().get_node(p(b"some"))?.is_none());
1540 1540
1541 1541 Ok(())
1542 1542 }
1543 1543
1544 1544 /// Test the simple case of all tracked, but multiple files
1545 1545 #[test]
1546 1546 fn test_tracked_descendants_multiple() -> Result<(), DirstateError> {
1547 1547 let mut map = OwningDirstateMap::new_empty(vec![]);
1548 1548
1549 1549 map.set_tracked(p(b"some/nested/path"))?;
1550 1550 map.set_tracked(p(b"some/nested/file"))?;
1551 1551 // one layer without any files to test deletion cascade
1552 1552 map.set_tracked(p(b"some/other/nested/path"))?;
1553 1553 map.set_tracked(p(b"root_file"))?;
1554 1554 map.set_tracked(p(b"some/file"))?;
1555 1555 map.set_tracked(p(b"some/file2"))?;
1556 1556 map.set_tracked(p(b"some/file3"))?;
1557 1557
1558 1558 assert_eq!(map.len(), 7);
1559 1559 assert_eq!(tracked_descendants(&map, b"some"), 6);
1560 1560 assert_eq!(tracked_descendants(&map, b"some/nested"), 2);
1561 1561 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1562 1562 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1563 1563 assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0);
1564 1564
1565 1565 map.set_untracked(p(b"some/nested/path"))?;
1566 1566 assert_eq!(map.len(), 6);
1567 1567 assert_eq!(tracked_descendants(&map, b"some"), 5);
1568 1568 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1569 1569 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1570 1570 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1571 1571
1572 1572 map.set_untracked(p(b"some/nested/file"))?;
1573 1573 assert_eq!(map.len(), 5);
1574 1574 assert_eq!(tracked_descendants(&map, b"some"), 4);
1575 1575 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1576 1576 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1577 1577 assert_does_not_exist(&map, b"some_nested");
1578 1578
1579 1579 map.set_untracked(p(b"some/other/nested/path"))?;
1580 1580 assert_eq!(map.len(), 4);
1581 1581 assert_eq!(tracked_descendants(&map, b"some"), 3);
1582 1582 assert_does_not_exist(&map, b"some/other");
1583 1583
1584 1584 map.set_untracked(p(b"root_file"))?;
1585 1585 assert_eq!(map.len(), 3);
1586 1586 assert_eq!(tracked_descendants(&map, b"some"), 3);
1587 1587 assert_does_not_exist(&map, b"root_file");
1588 1588
1589 1589 map.set_untracked(p(b"some/file"))?;
1590 1590 assert_eq!(map.len(), 2);
1591 1591 assert_eq!(tracked_descendants(&map, b"some"), 2);
1592 1592 assert_does_not_exist(&map, b"some/file");
1593 1593
1594 1594 map.set_untracked(p(b"some/file2"))?;
1595 1595 assert_eq!(map.len(), 1);
1596 1596 assert_eq!(tracked_descendants(&map, b"some"), 1);
1597 1597 assert_does_not_exist(&map, b"some/file2");
1598 1598
1599 1599 map.set_untracked(p(b"some/file3"))?;
1600 1600 assert_eq!(map.len(), 0);
1601 1601 assert_does_not_exist(&map, b"some/file3");
1602 1602
1603 1603 Ok(())
1604 1604 }
1605 1605
1606 1606 /// Check with a mix of tracked and non-tracked items
1607 1607 #[test]
1608 1608 fn test_tracked_descendants_different() -> Result<(), DirstateError> {
1609 1609 let mut map = OwningDirstateMap::new_empty(vec![]);
1610 1610
1611 1611 // A file that was just added
1612 1612 map.set_tracked(p(b"some/nested/path"))?;
1613 1613 // This has no information, the dirstate should ignore it
1614 1614 map.reset_state(p(b"some/file"), false, false, false, false, None)?;
1615 1615 assert_does_not_exist(&map, b"some/file");
1616 1616
1617 1617 // A file that was removed
1618 1618 map.reset_state(
1619 1619 p(b"some/nested/file"),
1620 1620 false,
1621 1621 true,
1622 1622 false,
1623 1623 false,
1624 1624 None,
1625 1625 )?;
1626 1626 assert!(!map.get(p(b"some/nested/file"))?.unwrap().tracked());
1627 1627 // Only present in p2
1628 1628 map.reset_state(p(b"some/file3"), false, false, true, false, None)?;
1629 1629 assert!(!map.get(p(b"some/file3"))?.unwrap().tracked());
1630 1630 // A file that was merged
1631 1631 map.reset_state(p(b"root_file"), true, true, true, false, None)?;
1632 1632 assert!(map.get(p(b"root_file"))?.unwrap().tracked());
1633 1633 // A file that is added, with info from p2
1634 1634 // XXX is that actually possible?
1635 1635 map.reset_state(p(b"some/file2"), true, false, true, false, None)?;
1636 1636 assert!(map.get(p(b"some/file2"))?.unwrap().tracked());
1637 1637 // A clean file
1638 1638 // One layer without any files to test deletion cascade
1639 1639 map.reset_state(
1640 1640 p(b"some/other/nested/path"),
1641 1641 true,
1642 1642 true,
1643 1643 false,
1644 1644 false,
1645 1645 None,
1646 1646 )?;
1647 1647 assert!(map.get(p(b"some/other/nested/path"))?.unwrap().tracked());
1648 1648
1649 1649 assert_eq!(map.len(), 6);
1650 1650 assert_eq!(tracked_descendants(&map, b"some"), 3);
1651 1651 assert_eq!(descendants_with_an_entry(&map, b"some"), 5);
1652 1652 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1653 1653 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1654 1654 assert_eq!(tracked_descendants(&map, b"some/other/nested/path"), 0);
1655 1655 assert_eq!(
1656 1656 descendants_with_an_entry(&map, b"some/other/nested/path"),
1657 1657 0
1658 1658 );
1659 1659 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1660 1660 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1661 1661
1662 1662 // might as well check this
1663 1663 map.set_untracked(p(b"path/does/not/exist"))?;
1664 1664 assert_eq!(map.len(), 6);
1665 1665
1666 1666 map.set_untracked(p(b"some/other/nested/path"))?;
1667 1667 // It is set untracked but not deleted since it held other information
1668 1668 assert_eq!(map.len(), 6);
1669 1669 assert_eq!(tracked_descendants(&map, b"some"), 2);
1670 1670 assert_eq!(descendants_with_an_entry(&map, b"some"), 5);
1671 1671 assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1);
1672 1672 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1673 1673 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1674 1674 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1675 1675
1676 1676 map.set_untracked(p(b"some/nested/path"))?;
1677 1677 // It is set untracked *and* deleted since it was only added
1678 1678 assert_eq!(map.len(), 5);
1679 1679 assert_eq!(tracked_descendants(&map, b"some"), 1);
1680 1680 assert_eq!(descendants_with_an_entry(&map, b"some"), 4);
1681 1681 assert_eq!(tracked_descendants(&map, b"some/nested"), 0);
1682 1682 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 1);
1683 1683 assert_does_not_exist(&map, b"some/nested/path");
1684 1684
1685 1685 map.set_untracked(p(b"root_file"))?;
1686 1686 // Untracked but not deleted
1687 1687 assert_eq!(map.len(), 5);
1688 1688 assert!(map.get(p(b"root_file"))?.is_some());
1689 1689
1690 1690 map.set_untracked(p(b"some/file2"))?;
1691 1691 assert_eq!(map.len(), 5);
1692 1692 assert_eq!(tracked_descendants(&map, b"some"), 0);
1693 1693 assert!(map.get(p(b"some/file2"))?.is_some());
1694 1694
1695 1695 map.set_untracked(p(b"some/file3"))?;
1696 1696 assert_eq!(map.len(), 5);
1697 1697 assert_eq!(tracked_descendants(&map, b"some"), 0);
1698 1698 assert!(map.get(p(b"some/file3"))?.is_some());
1699 1699
1700 1700 Ok(())
1701 1701 }
1702 1702
1703 1703 /// Check that copies counter is correctly updated
1704 1704 #[test]
1705 1705 fn test_copy_source() -> Result<(), DirstateError> {
1706 1706 let mut map = OwningDirstateMap::new_empty(vec![]);
1707 1707
1708 1708 // Clean file
1709 1709 map.reset_state(p(b"files/clean"), true, true, false, false, None)?;
1710 1710 // Merged file
1711 1711 map.reset_state(p(b"files/from_p2"), true, true, true, false, None)?;
1712 1712 // Removed file
1713 1713 map.reset_state(p(b"removed"), false, true, false, false, None)?;
1714 1714 // Added file
1715 1715 map.reset_state(p(b"files/added"), true, false, false, false, None)?;
1716 1716 // Add copy
1717 1717 map.copy_map_insert(p(b"files/clean"), p(b"clean_copy_source"))?;
1718 1718 assert_eq!(map.copy_map_len(), 1);
1719 1719
1720 1720 // Copy override
1721 1721 map.copy_map_insert(p(b"files/clean"), p(b"other_clean_copy_source"))?;
1722 1722 assert_eq!(map.copy_map_len(), 1);
1723 1723
1724 1724 // Multiple copies
1725 1725 map.copy_map_insert(p(b"removed"), p(b"removed_copy_source"))?;
1726 1726 assert_eq!(map.copy_map_len(), 2);
1727 1727
1728 1728 map.copy_map_insert(p(b"files/added"), p(b"added_copy_source"))?;
1729 1729 assert_eq!(map.copy_map_len(), 3);
1730 1730
1731 1731 // Added, so the entry is completely removed
1732 1732 map.set_untracked(p(b"files/added"))?;
1733 1733 assert_does_not_exist(&map, b"files/added");
1734 1734 assert_eq!(map.copy_map_len(), 2);
1735 1735
1736 1736 // Removed, so the entry is kept around, so is its copy
1737 1737 map.set_untracked(p(b"removed"))?;
1738 1738 assert!(map.get(p(b"removed"))?.is_some());
1739 1739 assert_eq!(map.copy_map_len(), 2);
1740 1740
1741 1741 // Clean, so the entry is kept around, but not its copy
1742 1742 map.set_untracked(p(b"files/clean"))?;
1743 1743 assert!(map.get(p(b"files/clean"))?.is_some());
1744 1744 assert_eq!(map.copy_map_len(), 1);
1745 1745
1746 1746 map.copy_map_insert(p(b"files/from_p2"), p(b"from_p2_copy_source"))?;
1747 1747 assert_eq!(map.copy_map_len(), 2);
1748 1748
1749 1749 // Info from p2, so its copy source info is kept around
1750 1750 map.set_untracked(p(b"files/from_p2"))?;
1751 1751 assert!(map.get(p(b"files/from_p2"))?.is_some());
1752 1752 assert_eq!(map.copy_map_len(), 2);
1753 1753
1754 1754 Ok(())
1755 1755 }
1756 1756
1757 1757 /// Test with "on disk" data. For the sake of this test, the "on disk" data
1758 1758 /// does not actually come from the disk, but it's opaque to the code being
1759 1759 /// tested.
1760 1760 #[test]
1761 1761 fn test_on_disk() -> Result<(), DirstateError> {
1762 1762 // First let's create some data to put "on disk"
1763 1763 let mut map = OwningDirstateMap::new_empty(vec![]);
1764 1764
1765 1765 // A file that was just added
1766 1766 map.set_tracked(p(b"some/nested/added"))?;
1767 1767 map.copy_map_insert(p(b"some/nested/added"), p(b"added_copy_source"))?;
1768 1768
1769 1769 // A file that was removed
1770 1770 map.reset_state(
1771 1771 p(b"some/nested/removed"),
1772 1772 false,
1773 1773 true,
1774 1774 false,
1775 1775 false,
1776 1776 None,
1777 1777 )?;
1778 1778 // Only present in p2
1779 1779 map.reset_state(
1780 1780 p(b"other/p2_info_only"),
1781 1781 false,
1782 1782 false,
1783 1783 true,
1784 1784 false,
1785 1785 None,
1786 1786 )?;
1787 1787 map.copy_map_insert(
1788 1788 p(b"other/p2_info_only"),
1789 1789 p(b"other/p2_info_copy_source"),
1790 1790 )?;
1791 1791 // A file that was merged
1792 1792 map.reset_state(p(b"merged"), true, true, true, false, None)?;
1793 1793 // A file that is added, with info from p2
1794 1794 // XXX is that actually possible?
1795 1795 map.reset_state(
1796 1796 p(b"other/added_with_p2"),
1797 1797 true,
1798 1798 false,
1799 1799 true,
1800 1800 false,
1801 1801 None,
1802 1802 )?;
1803 1803 // One layer without any files to test deletion cascade
1804 1804 // A clean file
1805 1805 map.reset_state(
1806 1806 p(b"some/other/nested/clean"),
1807 1807 true,
1808 1808 true,
1809 1809 false,
1810 1810 false,
1811 1811 None,
1812 1812 )?;
1813 1813
1814 1814 let (packed, metadata, _should_append, _old_data_size) =
1815 1815 map.pack_v2(false)?;
1816 1816 let packed_len = packed.len();
1817 1817 assert!(packed_len > 0);
1818 1818
1819 1819 // Recreate "from disk"
1820 1820 let mut map = OwningDirstateMap::new_v2(
1821 1821 packed,
1822 1822 packed_len,
1823 1823 metadata.as_bytes(),
1824 1824 )?;
1825 1825
1826 1826 // Check that everything is accounted for
1827 1827 assert!(map.contains_key(p(b"some/nested/added"))?);
1828 1828 assert!(map.contains_key(p(b"some/nested/removed"))?);
1829 1829 assert!(map.contains_key(p(b"merged"))?);
1830 1830 assert!(map.contains_key(p(b"other/p2_info_only"))?);
1831 1831 assert!(map.contains_key(p(b"other/added_with_p2"))?);
1832 1832 assert!(map.contains_key(p(b"some/other/nested/clean"))?);
1833 1833 assert_eq!(
1834 1834 map.copy_map_get(p(b"some/nested/added"))?,
1835 1835 Some(p(b"added_copy_source"))
1836 1836 );
1837 1837 assert_eq!(
1838 1838 map.copy_map_get(p(b"other/p2_info_only"))?,
1839 1839 Some(p(b"other/p2_info_copy_source"))
1840 1840 );
1841 1841 assert_eq!(tracked_descendants(&map, b"some"), 2);
1842 1842 assert_eq!(descendants_with_an_entry(&map, b"some"), 3);
1843 1843 assert_eq!(tracked_descendants(&map, b"other"), 1);
1844 1844 assert_eq!(descendants_with_an_entry(&map, b"other"), 2);
1845 1845 assert_eq!(tracked_descendants(&map, b"some/other"), 1);
1846 1846 assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1);
1847 1847 assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1);
1848 1848 assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1);
1849 1849 assert_eq!(tracked_descendants(&map, b"some/nested"), 1);
1850 1850 assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2);
1851 1851 assert_eq!(map.len(), 6);
1852 1852 assert_eq!(map.get_map().unreachable_bytes, 0);
1853 1853 assert_eq!(map.copy_map_len(), 2);
1854 1854
1855 1855 // Shouldn't change anything since it's already not tracked
1856 1856 map.set_untracked(p(b"some/nested/removed"))?;
1857 1857 assert_eq!(map.get_map().unreachable_bytes, 0);
1858 1858
1859 1859 match map.get_map().root {
1860 1860 ChildNodes::InMemory(_) => {
1861 1861 panic!("root should not have been mutated")
1862 1862 }
1863 1863 _ => (),
1864 1864 }
1865 1865 // We haven't mutated enough (nothing, actually), we should still be in
1866 1866 // the append strategy
1867 1867 assert!(map.get_map().write_should_append());
1868 1868
1869 1869 // But this mutates the structure, so there should be unreachable_bytes
1870 1870 assert!(map.set_untracked(p(b"some/nested/added"))?);
1871 1871 let unreachable_bytes = map.get_map().unreachable_bytes;
1872 1872 assert!(unreachable_bytes > 0);
1873 1873
1874 1874 match map.get_map().root {
1875 1875 ChildNodes::OnDisk(_) => panic!("root should have been mutated"),
1876 1876 _ => (),
1877 1877 }
1878 1878
1879 1879 // This should not mutate the structure either, since `root` has
1880 1880 // already been mutated along with its direct children.
1881 1881 map.set_untracked(p(b"merged"))?;
1882 1882 assert_eq!(map.get_map().unreachable_bytes, unreachable_bytes);
1883 1883
1884 1884 match map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap() {
1885 1885 NodeRef::InMemory(_, _) => {
1886 1886 panic!("'other/added_with_p2' should not have been mutated")
1887 1887 }
1888 1888 _ => (),
1889 1889 }
1890 1890 // But this should, since it's in a different path
1891 1891 // than `<root>some/nested/add`
1892 1892 map.set_untracked(p(b"other/added_with_p2"))?;
1893 1893 assert!(map.get_map().unreachable_bytes > unreachable_bytes);
1894 1894
1895 1895 match map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap() {
1896 1896 NodeRef::OnDisk(_) => {
1897 1897 panic!("'other/added_with_p2' should have been mutated")
1898 1898 }
1899 1899 _ => (),
1900 1900 }
1901 1901
1902 1902 // We have rewritten most of the tree, we should create a new file
1903 1903 assert!(!map.get_map().write_should_append());
1904 1904
1905 1905 Ok(())
1906 1906 }
1907 1907 }
@@ -1,853 +1,878 b''
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
4 4
5 5 use crate::dirstate::{DirstateV2Data, TruncatedTimestamp};
6 6 use crate::dirstate_tree::dirstate_map::DirstateVersion;
7 7 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
8 8 use crate::dirstate_tree::path_with_basename::WithBasename;
9 9 use crate::errors::HgError;
10 10 use crate::utils::hg_path::HgPath;
11 11 use crate::DirstateEntry;
12 12 use crate::DirstateError;
13 13 use crate::DirstateParents;
14 14 use bitflags::bitflags;
15 15 use bytes_cast::unaligned::{U16Be, U32Be};
16 16 use bytes_cast::BytesCast;
17 17 use format_bytes::format_bytes;
18 18 use rand::Rng;
19 19 use std::borrow::Cow;
20 20 use std::convert::{TryFrom, TryInto};
21 21 use std::fmt::Write;
22 22
23 23 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
24 24 /// This a redundant sanity check more than an actual "magic number" since
25 25 /// `.hg/requires` already governs which format should be used.
26 26 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
27 27
28 28 /// Keep space for 256-bit hashes
29 29 const STORED_NODE_ID_BYTES: usize = 32;
30 30
31 31 /// … even though only 160 bits are used for now, with SHA-1
32 32 const USED_NODE_ID_BYTES: usize = 20;
33 33
34 34 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
35 35 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
36 36
37 37 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
38 38 const TREE_METADATA_SIZE: usize = 44;
39 39 const NODE_SIZE: usize = 44;
40 40
41 41 /// Make sure that size-affecting changes are made knowingly
42 42 #[allow(unused)]
43 43 fn static_assert_size_of() {
44 44 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
45 45 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
46 46 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
47 47 }
48 48
49 49 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
50 50 #[derive(BytesCast)]
51 51 #[repr(C)]
52 52 struct DocketHeader {
53 53 marker: [u8; V2_FORMAT_MARKER.len()],
54 54 parent_1: [u8; STORED_NODE_ID_BYTES],
55 55 parent_2: [u8; STORED_NODE_ID_BYTES],
56 56
57 57 metadata: TreeMetadata,
58 58
59 59 /// Counted in bytes
60 60 data_size: Size,
61 61
62 62 uuid_size: u8,
63 63 }
64 64
65 65 pub struct Docket<'on_disk> {
66 66 header: &'on_disk DocketHeader,
67 67 pub uuid: &'on_disk [u8],
68 68 }
69 69
70 70 /// Fields are documented in the *Tree metadata in the docket file*
71 71 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
72 72 #[derive(BytesCast)]
73 73 #[repr(C)]
74 74 pub struct TreeMetadata {
75 75 root_nodes: ChildNodes,
76 76 nodes_with_entry_count: Size,
77 77 nodes_with_copy_source_count: Size,
78 78 unreachable_bytes: Size,
79 79 unused: [u8; 4],
80 80
81 81 /// See *Optional hash of ignore patterns* section of
82 82 /// `mercurial/helptext/internals/dirstate-v2.txt`
83 83 ignore_patterns_hash: IgnorePatternsHash,
84 84 }
85 85
86 86 /// Fields are documented in the *The data file format*
87 87 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
88 88 #[derive(BytesCast, Debug)]
89 89 #[repr(C)]
90 90 pub(super) struct Node {
91 91 full_path: PathSlice,
92 92
93 93 /// In bytes from `self.full_path.start`
94 94 base_name_start: PathSize,
95 95
96 96 copy_source: OptPathSlice,
97 97 children: ChildNodes,
98 98 pub(super) descendants_with_entry_count: Size,
99 99 pub(super) tracked_descendants_count: Size,
100 100 flags: U16Be,
101 101 size: U32Be,
102 102 mtime: PackedTruncatedTimestamp,
103 103 }
104 104
105 105 bitflags! {
106 106 #[repr(C)]
107 107 struct Flags: u16 {
108 108 const WDIR_TRACKED = 1 << 0;
109 109 const P1_TRACKED = 1 << 1;
110 110 const P2_INFO = 1 << 2;
111 111 const MODE_EXEC_PERM = 1 << 3;
112 112 const MODE_IS_SYMLINK = 1 << 4;
113 113 const HAS_FALLBACK_EXEC = 1 << 5;
114 114 const FALLBACK_EXEC = 1 << 6;
115 115 const HAS_FALLBACK_SYMLINK = 1 << 7;
116 116 const FALLBACK_SYMLINK = 1 << 8;
117 117 const EXPECTED_STATE_IS_MODIFIED = 1 << 9;
118 118 const HAS_MODE_AND_SIZE = 1 <<10;
119 119 const HAS_MTIME = 1 <<11;
120 120 const MTIME_SECOND_AMBIGUOUS = 1 << 12;
121 121 const DIRECTORY = 1 <<13;
122 122 const ALL_UNKNOWN_RECORDED = 1 <<14;
123 123 const ALL_IGNORED_RECORDED = 1 <<15;
124 124 }
125 125 }
126 126
127 127 /// Duration since the Unix epoch
128 128 #[derive(BytesCast, Copy, Clone, Debug)]
129 129 #[repr(C)]
130 130 struct PackedTruncatedTimestamp {
131 131 truncated_seconds: U32Be,
132 132 nanoseconds: U32Be,
133 133 }
134 134
135 135 /// Counted in bytes from the start of the file
136 136 ///
137 137 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
138 138 type Offset = U32Be;
139 139
140 140 /// Counted in number of items
141 141 ///
142 142 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
143 143 type Size = U32Be;
144 144
145 145 /// Counted in bytes
146 146 ///
147 147 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
148 148 type PathSize = U16Be;
149 149
150 150 /// A contiguous sequence of `len` times `Node`, representing the child nodes
151 151 /// of either some other node or of the repository root.
152 152 ///
153 153 /// Always sorted by ascending `full_path`, to allow binary search.
154 154 /// Since nodes with the same parent nodes also have the same parent path,
155 155 /// only the `base_name`s need to be compared during binary search.
156 156 #[derive(BytesCast, Copy, Clone, Debug)]
157 157 #[repr(C)]
158 158 struct ChildNodes {
159 159 start: Offset,
160 160 len: Size,
161 161 }
162 162
163 163 /// A `HgPath` of `len` bytes
164 164 #[derive(BytesCast, Copy, Clone, Debug)]
165 165 #[repr(C)]
166 166 struct PathSlice {
167 167 start: Offset,
168 168 len: PathSize,
169 169 }
170 170
171 171 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
172 172 type OptPathSlice = PathSlice;
173 173
174 174 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
175 175 ///
176 176 /// This should only happen if Mercurial is buggy or a repository is corrupted.
177 177 #[derive(Debug)]
178 pub struct DirstateV2ParseError;
178 pub struct DirstateV2ParseError {
179 message: String,
180 }
181
182 impl DirstateV2ParseError {
183 pub fn new<S: Into<String>>(message: S) -> Self {
184 Self {
185 message: message.into(),
186 }
187 }
188 }
179 189
180 190 impl From<DirstateV2ParseError> for HgError {
181 fn from(_: DirstateV2ParseError) -> Self {
182 HgError::corrupted("dirstate-v2 parse error")
191 fn from(e: DirstateV2ParseError) -> Self {
192 HgError::corrupted(format!("dirstate-v2 parse error: {}", e.message))
183 193 }
184 194 }
185 195
186 196 impl From<DirstateV2ParseError> for crate::DirstateError {
187 197 fn from(error: DirstateV2ParseError) -> Self {
188 198 HgError::from(error).into()
189 199 }
190 200 }
191 201
192 202 impl TreeMetadata {
193 203 pub fn as_bytes(&self) -> &[u8] {
194 204 BytesCast::as_bytes(self)
195 205 }
196 206 }
197 207
198 208 impl<'on_disk> Docket<'on_disk> {
199 209 /// Generate the identifier for a new data file
200 210 ///
201 211 /// TODO: support the `HGTEST_UUIDFILE` environment variable.
202 212 /// See `mercurial/revlogutils/docket.py`
203 213 pub fn new_uid() -> String {
204 214 const ID_LENGTH: usize = 8;
205 215 let mut id = String::with_capacity(ID_LENGTH);
206 216 let mut rng = rand::thread_rng();
207 217 for _ in 0..ID_LENGTH {
208 218 // One random hexadecimal digit.
209 219 // `unwrap` never panics because `impl Write for String`
210 220 // never returns an error.
211 221 write!(&mut id, "{:x}", rng.gen_range(0..16)).unwrap();
212 222 }
213 223 id
214 224 }
215 225
216 226 pub fn serialize(
217 227 parents: DirstateParents,
218 228 tree_metadata: TreeMetadata,
219 229 data_size: u64,
220 230 uuid: &[u8],
221 231 ) -> Result<Vec<u8>, std::num::TryFromIntError> {
222 232 let header = DocketHeader {
223 233 marker: *V2_FORMAT_MARKER,
224 234 parent_1: parents.p1.pad_to_256_bits(),
225 235 parent_2: parents.p2.pad_to_256_bits(),
226 236 metadata: tree_metadata,
227 237 data_size: u32::try_from(data_size)?.into(),
228 238 uuid_size: uuid.len().try_into()?,
229 239 };
230 240 let header = header.as_bytes();
231 241 let mut docket = Vec::with_capacity(header.len() + uuid.len());
232 242 docket.extend_from_slice(header);
233 243 docket.extend_from_slice(uuid);
234 244 Ok(docket)
235 245 }
236 246
237 247 pub fn parents(&self) -> DirstateParents {
238 248 use crate::Node;
239 249 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
240 250 .unwrap()
241 251 .clone();
242 252 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
243 253 .unwrap()
244 254 .clone();
245 255 DirstateParents { p1, p2 }
246 256 }
247 257
248 258 pub fn tree_metadata(&self) -> &[u8] {
249 259 self.header.metadata.as_bytes()
250 260 }
251 261
252 262 pub fn data_size(&self) -> usize {
253 263 // This `unwrap` could only panic on a 16-bit CPU
254 264 self.header.data_size.get().try_into().unwrap()
255 265 }
256 266
257 267 pub fn data_filename(&self) -> String {
258 268 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
259 269 }
260 270 }
261 271
262 272 pub fn read_docket(
263 273 on_disk: &[u8],
264 274 ) -> Result<Docket<'_>, DirstateV2ParseError> {
265 let (header, uuid) =
266 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
275 let (header, uuid) = DocketHeader::from_bytes(on_disk).map_err(|e| {
276 DirstateV2ParseError::new(format!("when reading docket, {}", e))
277 })?;
267 278 let uuid_size = header.uuid_size as usize;
268 279 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
269 280 Ok(Docket { header, uuid })
270 281 } else {
271 Err(DirstateV2ParseError)
282 Err(DirstateV2ParseError::new(
283 "invalid format marker or uuid size",
284 ))
272 285 }
273 286 }
274 287
275 288 pub(super) fn read<'on_disk>(
276 289 on_disk: &'on_disk [u8],
277 290 metadata: &[u8],
278 291 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
279 292 if on_disk.is_empty() {
280 293 let mut map = DirstateMap::empty(on_disk);
281 294 map.dirstate_version = DirstateVersion::V2;
282 295 return Ok(map);
283 296 }
284 let (meta, _) = TreeMetadata::from_bytes(metadata)
285 .map_err(|_| DirstateV2ParseError)?;
297 let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| {
298 DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e))
299 })?;
286 300 let dirstate_map = DirstateMap {
287 301 on_disk,
288 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
289 on_disk,
290 meta.root_nodes,
291 )?),
302 root: dirstate_map::ChildNodes::OnDisk(
303 read_nodes(on_disk, meta.root_nodes).map_err(|mut e| {
304 e.message = format!("{}, when reading root notes", e.message);
305 e
306 })?,
307 ),
292 308 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
293 309 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
294 310 ignore_patterns_hash: meta.ignore_patterns_hash,
295 311 unreachable_bytes: meta.unreachable_bytes.get(),
296 312 old_data_size: on_disk.len(),
297 313 dirstate_version: DirstateVersion::V2,
298 314 };
299 315 Ok(dirstate_map)
300 316 }
301 317
302 318 impl Node {
303 319 pub(super) fn full_path<'on_disk>(
304 320 &self,
305 321 on_disk: &'on_disk [u8],
306 322 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
307 323 read_hg_path(on_disk, self.full_path)
308 324 }
309 325
310 326 pub(super) fn base_name_start<'on_disk>(
311 327 &self,
312 328 ) -> Result<usize, DirstateV2ParseError> {
313 329 let start = self.base_name_start.get();
314 330 if start < self.full_path.len.get() {
315 331 let start = usize::try_from(start)
316 332 // u32 -> usize, could only panic on a 16-bit CPU
317 333 .expect("dirstate-v2 base_name_start out of bounds");
318 334 Ok(start)
319 335 } else {
320 Err(DirstateV2ParseError)
336 Err(DirstateV2ParseError::new("not enough bytes for base name"))
321 337 }
322 338 }
323 339
324 340 pub(super) fn base_name<'on_disk>(
325 341 &self,
326 342 on_disk: &'on_disk [u8],
327 343 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
328 344 let full_path = self.full_path(on_disk)?;
329 345 let base_name_start = self.base_name_start()?;
330 346 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
331 347 }
332 348
333 349 pub(super) fn path<'on_disk>(
334 350 &self,
335 351 on_disk: &'on_disk [u8],
336 352 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
337 353 Ok(WithBasename::from_raw_parts(
338 354 Cow::Borrowed(self.full_path(on_disk)?),
339 355 self.base_name_start()?,
340 356 ))
341 357 }
342 358
343 359 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
344 360 self.copy_source.start.get() != 0
345 361 }
346 362
347 363 pub(super) fn copy_source<'on_disk>(
348 364 &self,
349 365 on_disk: &'on_disk [u8],
350 366 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
351 367 Ok(if self.has_copy_source() {
352 368 Some(read_hg_path(on_disk, self.copy_source)?)
353 369 } else {
354 370 None
355 371 })
356 372 }
357 373
358 374 fn flags(&self) -> Flags {
359 375 Flags::from_bits_truncate(self.flags.get())
360 376 }
361 377
362 378 fn has_entry(&self) -> bool {
363 379 self.flags().intersects(
364 380 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
365 381 )
366 382 }
367 383
368 384 pub(super) fn node_data(
369 385 &self,
370 386 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
371 387 if self.has_entry() {
372 388 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
373 389 } else if let Some(mtime) = self.cached_directory_mtime()? {
374 390 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
375 391 } else {
376 392 Ok(dirstate_map::NodeData::None)
377 393 }
378 394 }
379 395
380 396 pub(super) fn cached_directory_mtime(
381 397 &self,
382 398 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
383 399 // For now we do not have code to handle the absence of
384 400 // ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is
385 401 // unset.
386 402 if self.flags().contains(Flags::DIRECTORY)
387 403 && self.flags().contains(Flags::HAS_MTIME)
388 404 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
389 405 {
390 406 Ok(Some(self.mtime()?))
391 407 } else {
392 408 Ok(None)
393 409 }
394 410 }
395 411
396 412 fn synthesize_unix_mode(&self) -> u32 {
397 413 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
398 414 libc::S_IFLNK
399 415 } else {
400 416 libc::S_IFREG
401 417 };
402 418 let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
403 419 0o755
404 420 } else {
405 421 0o644
406 422 };
407 423 (file_type | permisions).into()
408 424 }
409 425
410 426 fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> {
411 427 let mut m: TruncatedTimestamp = self.mtime.try_into()?;
412 428 if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) {
413 429 m.second_ambiguous = true;
414 430 }
415 431 Ok(m)
416 432 }
417 433
418 434 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
419 435 // TODO: convert through raw bits instead?
420 436 let wc_tracked = self.flags().contains(Flags::WDIR_TRACKED);
421 437 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
422 438 let p2_info = self.flags().contains(Flags::P2_INFO);
423 439 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
424 440 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
425 441 {
426 442 Some((self.synthesize_unix_mode(), self.size.into()))
427 443 } else {
428 444 None
429 445 };
430 446 let mtime = if self.flags().contains(Flags::HAS_MTIME)
431 447 && !self.flags().contains(Flags::DIRECTORY)
432 448 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
433 449 {
434 450 Some(self.mtime()?)
435 451 } else {
436 452 None
437 453 };
438 454 let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC)
439 455 {
440 456 Some(self.flags().contains(Flags::FALLBACK_EXEC))
441 457 } else {
442 458 None
443 459 };
444 460 let fallback_symlink =
445 461 if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) {
446 462 Some(self.flags().contains(Flags::FALLBACK_SYMLINK))
447 463 } else {
448 464 None
449 465 };
450 466 Ok(DirstateEntry::from_v2_data(DirstateV2Data {
451 467 wc_tracked,
452 468 p1_tracked,
453 469 p2_info,
454 470 mode_size,
455 471 mtime,
456 472 fallback_exec,
457 473 fallback_symlink,
458 474 }))
459 475 }
460 476
461 477 pub(super) fn entry(
462 478 &self,
463 479 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
464 480 if self.has_entry() {
465 481 Ok(Some(self.assume_entry()?))
466 482 } else {
467 483 Ok(None)
468 484 }
469 485 }
470 486
471 487 pub(super) fn children<'on_disk>(
472 488 &self,
473 489 on_disk: &'on_disk [u8],
474 490 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
475 491 read_nodes(on_disk, self.children)
476 492 }
477 493
478 494 pub(super) fn to_in_memory_node<'on_disk>(
479 495 &self,
480 496 on_disk: &'on_disk [u8],
481 497 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
482 498 Ok(dirstate_map::Node {
483 499 children: dirstate_map::ChildNodes::OnDisk(
484 500 self.children(on_disk)?,
485 501 ),
486 502 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
487 503 data: self.node_data()?,
488 504 descendants_with_entry_count: self
489 505 .descendants_with_entry_count
490 506 .get(),
491 507 tracked_descendants_count: self.tracked_descendants_count.get(),
492 508 })
493 509 }
494 510
495 511 fn from_dirstate_entry(
496 512 entry: &DirstateEntry,
497 513 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
498 514 let DirstateV2Data {
499 515 wc_tracked,
500 516 p1_tracked,
501 517 p2_info,
502 518 mode_size: mode_size_opt,
503 519 mtime: mtime_opt,
504 520 fallback_exec,
505 521 fallback_symlink,
506 522 } = entry.v2_data();
507 523 // TODO: convert through raw flag bits instead?
508 524 let mut flags = Flags::empty();
509 525 flags.set(Flags::WDIR_TRACKED, wc_tracked);
510 526 flags.set(Flags::P1_TRACKED, p1_tracked);
511 527 flags.set(Flags::P2_INFO, p2_info);
512 528 let size = if let Some((m, s)) = mode_size_opt {
513 529 let exec_perm = m & (libc::S_IXUSR as u32) != 0;
514 530 let is_symlink = m & (libc::S_IFMT as u32) == libc::S_IFLNK as u32;
515 531 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
516 532 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
517 533 flags.insert(Flags::HAS_MODE_AND_SIZE);
518 534 s.into()
519 535 } else {
520 536 0.into()
521 537 };
522 538 let mtime = if let Some(m) = mtime_opt {
523 539 flags.insert(Flags::HAS_MTIME);
524 540 if m.second_ambiguous {
525 541 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS);
526 542 };
527 543 m.into()
528 544 } else {
529 545 PackedTruncatedTimestamp::null()
530 546 };
531 547 if let Some(f_exec) = fallback_exec {
532 548 flags.insert(Flags::HAS_FALLBACK_EXEC);
533 549 if f_exec {
534 550 flags.insert(Flags::FALLBACK_EXEC);
535 551 }
536 552 }
537 553 if let Some(f_symlink) = fallback_symlink {
538 554 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
539 555 if f_symlink {
540 556 flags.insert(Flags::FALLBACK_SYMLINK);
541 557 }
542 558 }
543 559 (flags, size, mtime)
544 560 }
545 561 }
546 562
547 563 fn read_hg_path(
548 564 on_disk: &[u8],
549 565 slice: PathSlice,
550 566 ) -> Result<&HgPath, DirstateV2ParseError> {
551 567 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
552 568 }
553 569
554 570 fn read_nodes(
555 571 on_disk: &[u8],
556 572 slice: ChildNodes,
557 573 ) -> Result<&[Node], DirstateV2ParseError> {
558 574 read_slice(on_disk, slice.start, slice.len.get())
559 575 }
560 576
561 577 fn read_slice<T, Len>(
562 578 on_disk: &[u8],
563 579 start: Offset,
564 580 len: Len,
565 581 ) -> Result<&[T], DirstateV2ParseError>
566 582 where
567 583 T: BytesCast,
568 584 Len: TryInto<usize>,
569 585 {
570 586 // Either `usize::MAX` would result in "out of bounds" error since a single
571 587 // `&[u8]` cannot occupy the entire addess space.
572 588 let start = start.get().try_into().unwrap_or(std::usize::MAX);
573 589 let len = len.try_into().unwrap_or(std::usize::MAX);
574 on_disk
575 .get(start..)
576 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
590 let bytes = match on_disk.get(start..) {
591 Some(bytes) => bytes,
592 None => {
593 return Err(DirstateV2ParseError::new(
594 "not enough bytes from disk",
595 ))
596 }
597 };
598 T::slice_from_bytes(bytes, len)
599 .map_err(|e| {
600 DirstateV2ParseError::new(format!("when reading a slice, {}", e))
601 })
577 602 .map(|(slice, _rest)| slice)
578 .ok_or_else(|| DirstateV2ParseError)
579 603 }
580 604
581 605 pub(crate) fn for_each_tracked_path<'on_disk>(
582 606 on_disk: &'on_disk [u8],
583 607 metadata: &[u8],
584 608 mut f: impl FnMut(&'on_disk HgPath),
585 609 ) -> Result<(), DirstateV2ParseError> {
586 let (meta, _) = TreeMetadata::from_bytes(metadata)
587 .map_err(|_| DirstateV2ParseError)?;
610 let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| {
611 DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e))
612 })?;
588 613 fn recur<'on_disk>(
589 614 on_disk: &'on_disk [u8],
590 615 nodes: ChildNodes,
591 616 f: &mut impl FnMut(&'on_disk HgPath),
592 617 ) -> Result<(), DirstateV2ParseError> {
593 618 for node in read_nodes(on_disk, nodes)? {
594 619 if let Some(entry) = node.entry()? {
595 620 if entry.tracked() {
596 621 f(node.full_path(on_disk)?)
597 622 }
598 623 }
599 624 recur(on_disk, node.children, f)?
600 625 }
601 626 Ok(())
602 627 }
603 628 recur(on_disk, meta.root_nodes, &mut f)
604 629 }
605 630
606 631 /// Returns new data and metadata, together with whether that data should be
607 632 /// appended to the existing data file whose content is at
608 633 /// `dirstate_map.on_disk` (true), instead of written to a new data file
609 634 /// (false), and the previous size of data on disk.
610 635 pub(super) fn write(
611 636 dirstate_map: &DirstateMap,
612 637 can_append: bool,
613 638 ) -> Result<(Vec<u8>, TreeMetadata, bool, usize), DirstateError> {
614 639 let append = can_append && dirstate_map.write_should_append();
615 640
616 641 // This ignores the space for paths, and for nodes without an entry.
617 642 // TODO: better estimate? Skip the `Vec` and write to a file directly?
618 643 let size_guess = std::mem::size_of::<Node>()
619 644 * dirstate_map.nodes_with_entry_count as usize;
620 645
621 646 let mut writer = Writer {
622 647 dirstate_map,
623 648 append,
624 649 out: Vec::with_capacity(size_guess),
625 650 };
626 651
627 652 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
628 653
629 654 let unreachable_bytes = if append {
630 655 dirstate_map.unreachable_bytes
631 656 } else {
632 657 0
633 658 };
634 659 let meta = TreeMetadata {
635 660 root_nodes,
636 661 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
637 662 nodes_with_copy_source_count: dirstate_map
638 663 .nodes_with_copy_source_count
639 664 .into(),
640 665 unreachable_bytes: unreachable_bytes.into(),
641 666 unused: [0; 4],
642 667 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
643 668 };
644 669 Ok((writer.out, meta, append, dirstate_map.old_data_size))
645 670 }
646 671
647 672 struct Writer<'dmap, 'on_disk> {
648 673 dirstate_map: &'dmap DirstateMap<'on_disk>,
649 674 append: bool,
650 675 out: Vec<u8>,
651 676 }
652 677
653 678 impl Writer<'_, '_> {
654 679 fn write_nodes(
655 680 &mut self,
656 681 nodes: dirstate_map::ChildNodesRef,
657 682 ) -> Result<ChildNodes, DirstateError> {
658 683 // Reuse already-written nodes if possible
659 684 if self.append {
660 685 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
661 686 let start = self.on_disk_offset_of(nodes_slice).expect(
662 687 "dirstate-v2 OnDisk nodes not found within on_disk",
663 688 );
664 689 let len = child_nodes_len_from_usize(nodes_slice.len());
665 690 return Ok(ChildNodes { start, len });
666 691 }
667 692 }
668 693
669 694 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
670 695 // undefined iteration order. Sort to enable binary search in the
671 696 // written file.
672 697 let nodes = nodes.sorted();
673 698 let nodes_len = nodes.len();
674 699
675 700 // First accumulate serialized nodes in a `Vec`
676 701 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
677 702 for node in nodes {
678 703 let children =
679 704 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
680 705 let full_path = node.full_path(self.dirstate_map.on_disk)?;
681 706 let full_path = self.write_path(full_path.as_bytes());
682 707 let copy_source = if let Some(source) =
683 708 node.copy_source(self.dirstate_map.on_disk)?
684 709 {
685 710 self.write_path(source.as_bytes())
686 711 } else {
687 712 PathSlice {
688 713 start: 0.into(),
689 714 len: 0.into(),
690 715 }
691 716 };
692 717 on_disk_nodes.push(match node {
693 718 NodeRef::InMemory(path, node) => {
694 719 let (flags, size, mtime) = match &node.data {
695 720 dirstate_map::NodeData::Entry(entry) => {
696 721 Node::from_dirstate_entry(entry)
697 722 }
698 723 dirstate_map::NodeData::CachedDirectory { mtime } => {
699 724 // we currently never set a mtime if unknown file
700 725 // are present.
701 726 // So if we have a mtime for a directory, we know
702 727 // they are no unknown
703 728 // files and we
704 729 // blindly set ALL_UNKNOWN_RECORDED.
705 730 //
706 731 // We never set ALL_IGNORED_RECORDED since we
707 732 // don't track that case
708 733 // currently.
709 734 let mut flags = Flags::DIRECTORY
710 735 | Flags::HAS_MTIME
711 736 | Flags::ALL_UNKNOWN_RECORDED;
712 737 if mtime.second_ambiguous {
713 738 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS)
714 739 }
715 740 (flags, 0.into(), (*mtime).into())
716 741 }
717 742 dirstate_map::NodeData::None => (
718 743 Flags::DIRECTORY,
719 744 0.into(),
720 745 PackedTruncatedTimestamp::null(),
721 746 ),
722 747 };
723 748 Node {
724 749 children,
725 750 copy_source,
726 751 full_path,
727 752 base_name_start: u16::try_from(path.base_name_start())
728 753 // Could only panic for paths over 64 KiB
729 754 .expect("dirstate-v2 path length overflow")
730 755 .into(),
731 756 descendants_with_entry_count: node
732 757 .descendants_with_entry_count
733 758 .into(),
734 759 tracked_descendants_count: node
735 760 .tracked_descendants_count
736 761 .into(),
737 762 flags: flags.bits().into(),
738 763 size,
739 764 mtime,
740 765 }
741 766 }
742 767 NodeRef::OnDisk(node) => Node {
743 768 children,
744 769 copy_source,
745 770 full_path,
746 771 ..*node
747 772 },
748 773 })
749 774 }
750 775 // … so we can write them contiguously, after writing everything else
751 776 // they refer to.
752 777 let start = self.current_offset();
753 778 let len = child_nodes_len_from_usize(nodes_len);
754 779 self.out.extend(on_disk_nodes.as_bytes());
755 780 Ok(ChildNodes { start, len })
756 781 }
757 782
758 783 /// If the given slice of items is within `on_disk`, returns its offset
759 784 /// from the start of `on_disk`.
760 785 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
761 786 where
762 787 T: BytesCast,
763 788 {
764 789 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
765 790 let start = slice.as_ptr() as usize;
766 791 let end = start + slice.len();
767 792 start..=end
768 793 }
769 794 let slice_addresses = address_range(slice.as_bytes());
770 795 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
771 796 if on_disk_addresses.contains(slice_addresses.start())
772 797 && on_disk_addresses.contains(slice_addresses.end())
773 798 {
774 799 let offset = slice_addresses.start() - on_disk_addresses.start();
775 800 Some(offset_from_usize(offset))
776 801 } else {
777 802 None
778 803 }
779 804 }
780 805
781 806 fn current_offset(&mut self) -> Offset {
782 807 let mut offset = self.out.len();
783 808 if self.append {
784 809 offset += self.dirstate_map.on_disk.len()
785 810 }
786 811 offset_from_usize(offset)
787 812 }
788 813
789 814 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
790 815 let len = path_len_from_usize(slice.len());
791 816 // Reuse an already-written path if possible
792 817 if self.append {
793 818 if let Some(start) = self.on_disk_offset_of(slice) {
794 819 return PathSlice { start, len };
795 820 }
796 821 }
797 822 let start = self.current_offset();
798 823 self.out.extend(slice.as_bytes());
799 824 PathSlice { start, len }
800 825 }
801 826 }
802 827
803 828 fn offset_from_usize(x: usize) -> Offset {
804 829 u32::try_from(x)
805 830 // Could only panic for a dirstate file larger than 4 GiB
806 831 .expect("dirstate-v2 offset overflow")
807 832 .into()
808 833 }
809 834
810 835 fn child_nodes_len_from_usize(x: usize) -> Size {
811 836 u32::try_from(x)
812 837 // Could only panic with over 4 billion nodes
813 838 .expect("dirstate-v2 slice length overflow")
814 839 .into()
815 840 }
816 841
817 842 fn path_len_from_usize(x: usize) -> PathSize {
818 843 u16::try_from(x)
819 844 // Could only panic for paths over 64 KiB
820 845 .expect("dirstate-v2 path length overflow")
821 846 .into()
822 847 }
823 848
824 849 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
825 850 fn from(timestamp: TruncatedTimestamp) -> Self {
826 851 Self {
827 852 truncated_seconds: timestamp.truncated_seconds().into(),
828 853 nanoseconds: timestamp.nanoseconds().into(),
829 854 }
830 855 }
831 856 }
832 857
833 858 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
834 859 type Error = DirstateV2ParseError;
835 860
836 861 fn try_from(
837 862 timestamp: PackedTruncatedTimestamp,
838 863 ) -> Result<Self, Self::Error> {
839 864 Self::from_already_truncated(
840 865 timestamp.truncated_seconds.get(),
841 866 timestamp.nanoseconds.get(),
842 867 false,
843 868 )
844 869 }
845 870 }
846 871 impl PackedTruncatedTimestamp {
847 872 fn null() -> Self {
848 873 Self {
849 874 truncated_seconds: 0.into(),
850 875 nanoseconds: 0.into(),
851 876 }
852 877 }
853 878 }
General Comments 0
You need to be logged in to leave comments. Login now