##// END OF EJS Templates
rust-dirstate: introduce intermediate struct for dirstate-v2 data...
Raphaël Gomès -
r49991:38e5bb14 default
parent child Browse files
Show More
@@ -1,711 +1,716 b''
1 1 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
2 2 use crate::errors::HgError;
3 3 use bitflags::bitflags;
4 4 use std::convert::{TryFrom, TryInto};
5 5 use std::fs;
6 6 use std::io;
7 7 use std::time::{SystemTime, UNIX_EPOCH};
8 8
9 9 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
10 10 pub enum EntryState {
11 11 Normal,
12 12 Added,
13 13 Removed,
14 14 Merged,
15 15 }
16 16
17 17 /// `size` and `mtime.seconds` are truncated to 31 bits.
18 18 ///
19 19 /// TODO: double-check status algorithm correctness for files
20 20 /// larger than 2 GiB or modified after 2038.
21 21 #[derive(Debug, Copy, Clone)]
22 22 pub struct DirstateEntry {
23 23 pub(crate) flags: Flags,
24 24 mode_size: Option<(u32, u32)>,
25 25 mtime: Option<TruncatedTimestamp>,
26 26 }
27 27
28 28 bitflags! {
29 29 pub(crate) struct Flags: u8 {
30 30 const WDIR_TRACKED = 1 << 0;
31 31 const P1_TRACKED = 1 << 1;
32 32 const P2_INFO = 1 << 2;
33 33 const HAS_FALLBACK_EXEC = 1 << 3;
34 34 const FALLBACK_EXEC = 1 << 4;
35 35 const HAS_FALLBACK_SYMLINK = 1 << 5;
36 36 const FALLBACK_SYMLINK = 1 << 6;
37 37 }
38 38 }
39 39
40 40 /// A Unix timestamp with nanoseconds precision
41 41 #[derive(Debug, Copy, Clone)]
42 42 pub struct TruncatedTimestamp {
43 43 truncated_seconds: u32,
44 44 /// Always in the `0 .. 1_000_000_000` range.
45 45 nanoseconds: u32,
46 46 /// TODO this should be in DirstateEntry, but the current code needs
47 47 /// refactoring to use DirstateEntry instead of TruncatedTimestamp for
48 48 /// comparison.
49 49 pub second_ambiguous: bool,
50 50 }
51 51
52 52 impl TruncatedTimestamp {
53 53 /// Constructs from a timestamp potentially outside of the supported range,
54 54 /// and truncate the seconds components to its lower 31 bits.
55 55 ///
56 56 /// Panics if the nanoseconds components is not in the expected range.
57 57 pub fn new_truncate(
58 58 seconds: i64,
59 59 nanoseconds: u32,
60 60 second_ambiguous: bool,
61 61 ) -> Self {
62 62 assert!(nanoseconds < NSEC_PER_SEC);
63 63 Self {
64 64 truncated_seconds: seconds as u32 & RANGE_MASK_31BIT,
65 65 nanoseconds,
66 66 second_ambiguous,
67 67 }
68 68 }
69 69
70 70 /// Construct from components. Returns an error if they are not in the
71 71 /// expcted range.
72 72 pub fn from_already_truncated(
73 73 truncated_seconds: u32,
74 74 nanoseconds: u32,
75 75 second_ambiguous: bool,
76 76 ) -> Result<Self, DirstateV2ParseError> {
77 77 if truncated_seconds & !RANGE_MASK_31BIT == 0
78 78 && nanoseconds < NSEC_PER_SEC
79 79 {
80 80 Ok(Self {
81 81 truncated_seconds,
82 82 nanoseconds,
83 83 second_ambiguous,
84 84 })
85 85 } else {
86 86 Err(DirstateV2ParseError)
87 87 }
88 88 }
89 89
90 90 /// Returns a `TruncatedTimestamp` for the modification time of `metadata`.
91 91 ///
92 92 /// Propagates errors from `std` on platforms where modification time
93 93 /// is not available at all.
94 94 pub fn for_mtime_of(metadata: &fs::Metadata) -> io::Result<Self> {
95 95 #[cfg(unix)]
96 96 {
97 97 use std::os::unix::fs::MetadataExt;
98 98 let seconds = metadata.mtime();
99 99 // i64 -> u32 with value always in the `0 .. NSEC_PER_SEC` range
100 100 let nanoseconds = metadata.mtime_nsec().try_into().unwrap();
101 101 Ok(Self::new_truncate(seconds, nanoseconds, false))
102 102 }
103 103 #[cfg(not(unix))]
104 104 {
105 105 metadata.modified().map(Self::from)
106 106 }
107 107 }
108 108
109 109 /// Like `for_mtime_of`, but may return `None` or a value with
110 110 /// `second_ambiguous` set if the mtime is not "reliable".
111 111 ///
112 112 /// A modification time is reliable if it is older than `boundary` (or
113 113 /// sufficiently in the future).
114 114 ///
115 115 /// Otherwise a concurrent modification might happens with the same mtime.
116 116 pub fn for_reliable_mtime_of(
117 117 metadata: &fs::Metadata,
118 118 boundary: &Self,
119 119 ) -> io::Result<Option<Self>> {
120 120 let mut mtime = Self::for_mtime_of(metadata)?;
121 121 // If the mtime of the ambiguous file is younger (or equal) to the
122 122 // starting point of the `status` walk, we cannot garantee that
123 123 // another, racy, write will not happen right after with the same mtime
124 124 // and we cannot cache the information.
125 125 //
126 126 // However if the mtime is far away in the future, this is likely some
127 127 // mismatch between the current clock and previous file system
128 128 // operation. So mtime more than one days in the future are considered
129 129 // fine.
130 130 let reliable = if mtime.truncated_seconds == boundary.truncated_seconds
131 131 {
132 132 mtime.second_ambiguous = true;
133 133 mtime.nanoseconds != 0
134 134 && boundary.nanoseconds != 0
135 135 && mtime.nanoseconds < boundary.nanoseconds
136 136 } else {
137 137 // `truncated_seconds` is less than 2**31,
138 138 // so this does not overflow `u32`:
139 139 let one_day_later = boundary.truncated_seconds + 24 * 3600;
140 140 mtime.truncated_seconds < boundary.truncated_seconds
141 141 || mtime.truncated_seconds > one_day_later
142 142 };
143 143 if reliable {
144 144 Ok(Some(mtime))
145 145 } else {
146 146 Ok(None)
147 147 }
148 148 }
149 149
150 150 /// The lower 31 bits of the number of seconds since the epoch.
151 151 pub fn truncated_seconds(&self) -> u32 {
152 152 self.truncated_seconds
153 153 }
154 154
155 155 /// The sub-second component of this timestamp, in nanoseconds.
156 156 /// Always in the `0 .. 1_000_000_000` range.
157 157 ///
158 158 /// This timestamp is after `(seconds, 0)` by this many nanoseconds.
159 159 pub fn nanoseconds(&self) -> u32 {
160 160 self.nanoseconds
161 161 }
162 162
163 163 /// Returns whether two timestamps are equal modulo 2**31 seconds.
164 164 ///
165 165 /// If this returns `true`, the original values converted from `SystemTime`
166 166 /// or given to `new_truncate` were very likely equal. A false positive is
167 167 /// possible if they were exactly a multiple of 2**31 seconds apart (around
168 168 /// 68 years). This is deemed very unlikely to happen by chance, especially
169 169 /// on filesystems that support sub-second precision.
170 170 ///
171 171 /// If someone is manipulating the modification times of some files to
172 172 /// intentionally make `hg status` return incorrect results, not truncating
173 173 /// wouldn’t help much since they can set exactly the expected timestamp.
174 174 ///
175 175 /// Sub-second precision is ignored if it is zero in either value.
176 176 /// Some APIs simply return zero when more precision is not available.
177 177 /// When comparing values from different sources, if only one is truncated
178 178 /// in that way, doing a simple comparison would cause many false
179 179 /// negatives.
180 180 pub fn likely_equal(self, other: Self) -> bool {
181 181 if self.truncated_seconds != other.truncated_seconds {
182 182 false
183 183 } else if self.nanoseconds == 0 || other.nanoseconds == 0 {
184 184 if self.second_ambiguous {
185 185 false
186 186 } else {
187 187 true
188 188 }
189 189 } else {
190 190 self.nanoseconds == other.nanoseconds
191 191 }
192 192 }
193 193
194 194 pub fn likely_equal_to_mtime_of(
195 195 self,
196 196 metadata: &fs::Metadata,
197 197 ) -> io::Result<bool> {
198 198 Ok(self.likely_equal(Self::for_mtime_of(metadata)?))
199 199 }
200 200 }
201 201
202 202 impl From<SystemTime> for TruncatedTimestamp {
203 203 fn from(system_time: SystemTime) -> Self {
204 204 // On Unix, `SystemTime` is a wrapper for the `timespec` C struct:
205 205 // https://www.gnu.org/software/libc/manual/html_node/Time-Types.html#index-struct-timespec
206 206 // We want to effectively access its fields, but the Rust standard
207 207 // library does not expose them. The best we can do is:
208 208 let seconds;
209 209 let nanoseconds;
210 210 match system_time.duration_since(UNIX_EPOCH) {
211 211 Ok(duration) => {
212 212 seconds = duration.as_secs() as i64;
213 213 nanoseconds = duration.subsec_nanos();
214 214 }
215 215 Err(error) => {
216 216 // `system_time` is before `UNIX_EPOCH`.
217 217 // We need to undo this algorithm:
218 218 // https://github.com/rust-lang/rust/blob/6bed1f0bc3cc50c10aab26d5f94b16a00776b8a5/library/std/src/sys/unix/time.rs#L40-L41
219 219 let negative = error.duration();
220 220 let negative_secs = negative.as_secs() as i64;
221 221 let negative_nanos = negative.subsec_nanos();
222 222 if negative_nanos == 0 {
223 223 seconds = -negative_secs;
224 224 nanoseconds = 0;
225 225 } else {
226 226 // For example if `system_time` was 4.3 seconds before
227 227 // the Unix epoch we get a Duration that represents
228 228 // `(-4, -0.3)` but we want `(-5, +0.7)`:
229 229 seconds = -1 - negative_secs;
230 230 nanoseconds = NSEC_PER_SEC - negative_nanos;
231 231 }
232 232 }
233 233 };
234 234 Self::new_truncate(seconds, nanoseconds, false)
235 235 }
236 236 }
237 237
238 238 const NSEC_PER_SEC: u32 = 1_000_000_000;
239 239 pub const RANGE_MASK_31BIT: u32 = 0x7FFF_FFFF;
240 240
241 241 pub const MTIME_UNSET: i32 = -1;
242 242
243 243 /// A `DirstateEntry` with a size of `-2` means that it was merged from the
244 244 /// other parent. This allows revert to pick the right status back during a
245 245 /// merge.
246 246 pub const SIZE_FROM_OTHER_PARENT: i32 = -2;
247 247 /// A special value used for internal representation of special case in
248 248 /// dirstate v1 format.
249 249 pub const SIZE_NON_NORMAL: i32 = -1;
250 250
251 #[derive(Debug, Default, Copy, Clone)]
252 pub struct DirstateV2Data {
253 pub wc_tracked: bool,
254 pub p1_tracked: bool,
255 pub p2_info: bool,
256 pub mode_size: Option<(u32, u32)>,
257 pub mtime: Option<TruncatedTimestamp>,
258 pub fallback_exec: Option<bool>,
259 pub fallback_symlink: Option<bool>,
260 }
261
251 262 impl DirstateEntry {
252 pub fn from_v2_data(
253 wdir_tracked: bool,
254 p1_tracked: bool,
255 p2_info: bool,
256 mode_size: Option<(u32, u32)>,
257 mtime: Option<TruncatedTimestamp>,
258 fallback_exec: Option<bool>,
259 fallback_symlink: Option<bool>,
260 ) -> Self {
263 pub fn from_v2_data(v2_data: DirstateV2Data) -> Self {
264 let DirstateV2Data {
265 wc_tracked,
266 p1_tracked,
267 p2_info,
268 mode_size,
269 mtime,
270 fallback_exec,
271 fallback_symlink,
272 } = v2_data;
261 273 if let Some((mode, size)) = mode_size {
262 274 // TODO: return an error for out of range values?
263 275 assert!(mode & !RANGE_MASK_31BIT == 0);
264 276 assert!(size & !RANGE_MASK_31BIT == 0);
265 277 }
266 278 let mut flags = Flags::empty();
267 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
279 flags.set(Flags::WDIR_TRACKED, wc_tracked);
268 280 flags.set(Flags::P1_TRACKED, p1_tracked);
269 281 flags.set(Flags::P2_INFO, p2_info);
270 282 if let Some(exec) = fallback_exec {
271 283 flags.insert(Flags::HAS_FALLBACK_EXEC);
272 284 if exec {
273 285 flags.insert(Flags::FALLBACK_EXEC);
274 286 }
275 287 }
276 288 if let Some(exec) = fallback_symlink {
277 289 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
278 290 if exec {
279 291 flags.insert(Flags::FALLBACK_SYMLINK);
280 292 }
281 293 }
282 294 Self {
283 295 flags,
284 296 mode_size,
285 297 mtime,
286 298 }
287 299 }
288 300
289 301 pub fn from_v1_data(
290 302 state: EntryState,
291 303 mode: i32,
292 304 size: i32,
293 305 mtime: i32,
294 306 ) -> Self {
295 307 match state {
296 308 EntryState::Normal => {
297 309 if size == SIZE_FROM_OTHER_PARENT {
298 310 Self {
299 311 // might be missing P1_TRACKED
300 312 flags: Flags::WDIR_TRACKED | Flags::P2_INFO,
301 313 mode_size: None,
302 314 mtime: None,
303 315 }
304 316 } else if size == SIZE_NON_NORMAL {
305 317 Self {
306 318 flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
307 319 mode_size: None,
308 320 mtime: None,
309 321 }
310 322 } else if mtime == MTIME_UNSET {
311 323 // TODO: return an error for negative values?
312 324 let mode = u32::try_from(mode).unwrap();
313 325 let size = u32::try_from(size).unwrap();
314 326 Self {
315 327 flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
316 328 mode_size: Some((mode, size)),
317 329 mtime: None,
318 330 }
319 331 } else {
320 332 // TODO: return an error for negative values?
321 333 let mode = u32::try_from(mode).unwrap();
322 334 let size = u32::try_from(size).unwrap();
323 335 let mtime = u32::try_from(mtime).unwrap();
324 336 let mtime = TruncatedTimestamp::from_already_truncated(
325 337 mtime, 0, false,
326 338 )
327 339 .unwrap();
328 340 Self {
329 341 flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
330 342 mode_size: Some((mode, size)),
331 343 mtime: Some(mtime),
332 344 }
333 345 }
334 346 }
335 347 EntryState::Added => Self {
336 348 flags: Flags::WDIR_TRACKED,
337 349 mode_size: None,
338 350 mtime: None,
339 351 },
340 352 EntryState::Removed => Self {
341 353 flags: if size == SIZE_NON_NORMAL {
342 354 Flags::P1_TRACKED | Flags::P2_INFO
343 355 } else if size == SIZE_FROM_OTHER_PARENT {
344 356 // We don’t know if P1_TRACKED should be set (file history)
345 357 Flags::P2_INFO
346 358 } else {
347 359 Flags::P1_TRACKED
348 360 },
349 361 mode_size: None,
350 362 mtime: None,
351 363 },
352 364 EntryState::Merged => Self {
353 365 flags: Flags::WDIR_TRACKED
354 366 | Flags::P1_TRACKED // might not be true because of rename ?
355 367 | Flags::P2_INFO, // might not be true because of rename ?
356 368 mode_size: None,
357 369 mtime: None,
358 370 },
359 371 }
360 372 }
361 373
362 374 /// Creates a new entry in "removed" state.
363 375 ///
364 376 /// `size` is expected to be zero, `SIZE_NON_NORMAL`, or
365 377 /// `SIZE_FROM_OTHER_PARENT`
366 378 pub fn new_removed(size: i32) -> Self {
367 379 Self::from_v1_data(EntryState::Removed, 0, size, 0)
368 380 }
369 381
370 382 pub fn new_tracked() -> Self {
371 Self::from_v2_data(true, false, false, None, None, None, None)
383 let data = DirstateV2Data {
384 wc_tracked: true,
385 ..Default::default()
386 };
387 Self::from_v2_data(data)
372 388 }
373 389
374 390 pub fn tracked(&self) -> bool {
375 391 self.flags.contains(Flags::WDIR_TRACKED)
376 392 }
377 393
378 394 pub fn p1_tracked(&self) -> bool {
379 395 self.flags.contains(Flags::P1_TRACKED)
380 396 }
381 397
382 398 fn in_either_parent(&self) -> bool {
383 399 self.flags.intersects(Flags::P1_TRACKED | Flags::P2_INFO)
384 400 }
385 401
386 402 pub fn removed(&self) -> bool {
387 403 self.in_either_parent() && !self.flags.contains(Flags::WDIR_TRACKED)
388 404 }
389 405
390 406 pub fn p2_info(&self) -> bool {
391 407 self.flags.contains(Flags::WDIR_TRACKED | Flags::P2_INFO)
392 408 }
393 409
394 410 pub fn added(&self) -> bool {
395 411 self.flags.contains(Flags::WDIR_TRACKED) && !self.in_either_parent()
396 412 }
397 413
398 414 pub fn maybe_clean(&self) -> bool {
399 415 if !self.flags.contains(Flags::WDIR_TRACKED) {
400 416 false
401 417 } else if !self.flags.contains(Flags::P1_TRACKED) {
402 418 false
403 419 } else if self.flags.contains(Flags::P2_INFO) {
404 420 false
405 421 } else {
406 422 true
407 423 }
408 424 }
409 425
410 426 pub fn any_tracked(&self) -> bool {
411 427 self.flags.intersects(
412 428 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
413 429 )
414 430 }
415 431
416 /// Returns `(wdir_tracked, p1_tracked, p2_info, mode_size, mtime)`
417 pub(crate) fn v2_data(
418 &self,
419 ) -> (
420 bool,
421 bool,
422 bool,
423 Option<(u32, u32)>,
424 Option<TruncatedTimestamp>,
425 Option<bool>,
426 Option<bool>,
427 ) {
432 pub(crate) fn v2_data(&self) -> DirstateV2Data {
428 433 if !self.any_tracked() {
429 434 // TODO: return an Option instead?
430 435 panic!("Accessing v2_data of an untracked DirstateEntry")
431 436 }
432 let wdir_tracked = self.flags.contains(Flags::WDIR_TRACKED);
437 let wc_tracked = self.flags.contains(Flags::WDIR_TRACKED);
433 438 let p1_tracked = self.flags.contains(Flags::P1_TRACKED);
434 439 let p2_info = self.flags.contains(Flags::P2_INFO);
435 440 let mode_size = self.mode_size;
436 441 let mtime = self.mtime;
437 (
438 wdir_tracked,
442 DirstateV2Data {
443 wc_tracked,
439 444 p1_tracked,
440 445 p2_info,
441 446 mode_size,
442 447 mtime,
443 self.get_fallback_exec(),
444 self.get_fallback_symlink(),
445 )
448 fallback_exec: self.get_fallback_exec(),
449 fallback_symlink: self.get_fallback_symlink(),
450 }
446 451 }
447 452
448 453 fn v1_state(&self) -> EntryState {
449 454 if !self.any_tracked() {
450 455 // TODO: return an Option instead?
451 456 panic!("Accessing v1_state of an untracked DirstateEntry")
452 457 }
453 458 if self.removed() {
454 459 EntryState::Removed
455 460 } else if self
456 461 .flags
457 462 .contains(Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO)
458 463 {
459 464 EntryState::Merged
460 465 } else if self.added() {
461 466 EntryState::Added
462 467 } else {
463 468 EntryState::Normal
464 469 }
465 470 }
466 471
467 472 fn v1_mode(&self) -> i32 {
468 473 if let Some((mode, _size)) = self.mode_size {
469 474 i32::try_from(mode).unwrap()
470 475 } else {
471 476 0
472 477 }
473 478 }
474 479
475 480 fn v1_size(&self) -> i32 {
476 481 if !self.any_tracked() {
477 482 // TODO: return an Option instead?
478 483 panic!("Accessing v1_size of an untracked DirstateEntry")
479 484 }
480 485 if self.removed()
481 486 && self.flags.contains(Flags::P1_TRACKED | Flags::P2_INFO)
482 487 {
483 488 SIZE_NON_NORMAL
484 489 } else if self.flags.contains(Flags::P2_INFO) {
485 490 SIZE_FROM_OTHER_PARENT
486 491 } else if self.removed() {
487 492 0
488 493 } else if self.added() {
489 494 SIZE_NON_NORMAL
490 495 } else if let Some((_mode, size)) = self.mode_size {
491 496 i32::try_from(size).unwrap()
492 497 } else {
493 498 SIZE_NON_NORMAL
494 499 }
495 500 }
496 501
497 502 fn v1_mtime(&self) -> i32 {
498 503 if !self.any_tracked() {
499 504 // TODO: return an Option instead?
500 505 panic!("Accessing v1_mtime of an untracked DirstateEntry")
501 506 }
502 507 if self.removed() {
503 508 0
504 509 } else if self.flags.contains(Flags::P2_INFO) {
505 510 MTIME_UNSET
506 511 } else if !self.flags.contains(Flags::P1_TRACKED) {
507 512 MTIME_UNSET
508 513 } else if let Some(mtime) = self.mtime {
509 514 if mtime.second_ambiguous {
510 515 MTIME_UNSET
511 516 } else {
512 517 i32::try_from(mtime.truncated_seconds()).unwrap()
513 518 }
514 519 } else {
515 520 MTIME_UNSET
516 521 }
517 522 }
518 523
519 524 // TODO: return `Option<EntryState>`? None when `!self.any_tracked`
520 525 pub fn state(&self) -> EntryState {
521 526 self.v1_state()
522 527 }
523 528
524 529 // TODO: return Option?
525 530 pub fn mode(&self) -> i32 {
526 531 self.v1_mode()
527 532 }
528 533
529 534 // TODO: return Option?
530 535 pub fn size(&self) -> i32 {
531 536 self.v1_size()
532 537 }
533 538
534 539 // TODO: return Option?
535 540 pub fn mtime(&self) -> i32 {
536 541 self.v1_mtime()
537 542 }
538 543
539 544 pub fn get_fallback_exec(&self) -> Option<bool> {
540 545 if self.flags.contains(Flags::HAS_FALLBACK_EXEC) {
541 546 Some(self.flags.contains(Flags::FALLBACK_EXEC))
542 547 } else {
543 548 None
544 549 }
545 550 }
546 551
547 552 pub fn set_fallback_exec(&mut self, value: Option<bool>) {
548 553 match value {
549 554 None => {
550 555 self.flags.remove(Flags::HAS_FALLBACK_EXEC);
551 556 self.flags.remove(Flags::FALLBACK_EXEC);
552 557 }
553 558 Some(exec) => {
554 559 self.flags.insert(Flags::HAS_FALLBACK_EXEC);
555 560 if exec {
556 561 self.flags.insert(Flags::FALLBACK_EXEC);
557 562 }
558 563 }
559 564 }
560 565 }
561 566
562 567 pub fn get_fallback_symlink(&self) -> Option<bool> {
563 568 if self.flags.contains(Flags::HAS_FALLBACK_SYMLINK) {
564 569 Some(self.flags.contains(Flags::FALLBACK_SYMLINK))
565 570 } else {
566 571 None
567 572 }
568 573 }
569 574
570 575 pub fn set_fallback_symlink(&mut self, value: Option<bool>) {
571 576 match value {
572 577 None => {
573 578 self.flags.remove(Flags::HAS_FALLBACK_SYMLINK);
574 579 self.flags.remove(Flags::FALLBACK_SYMLINK);
575 580 }
576 581 Some(symlink) => {
577 582 self.flags.insert(Flags::HAS_FALLBACK_SYMLINK);
578 583 if symlink {
579 584 self.flags.insert(Flags::FALLBACK_SYMLINK);
580 585 }
581 586 }
582 587 }
583 588 }
584 589
585 590 pub fn truncated_mtime(&self) -> Option<TruncatedTimestamp> {
586 591 self.mtime
587 592 }
588 593
589 594 pub fn drop_merge_data(&mut self) {
590 595 if self.flags.contains(Flags::P2_INFO) {
591 596 self.flags.remove(Flags::P2_INFO);
592 597 self.mode_size = None;
593 598 self.mtime = None;
594 599 }
595 600 }
596 601
597 602 pub fn set_possibly_dirty(&mut self) {
598 603 self.mtime = None
599 604 }
600 605
601 606 pub fn set_clean(
602 607 &mut self,
603 608 mode: u32,
604 609 size: u32,
605 610 mtime: TruncatedTimestamp,
606 611 ) {
607 612 let size = size & RANGE_MASK_31BIT;
608 613 self.flags.insert(Flags::WDIR_TRACKED | Flags::P1_TRACKED);
609 614 self.mode_size = Some((mode, size));
610 615 self.mtime = Some(mtime);
611 616 }
612 617
613 618 pub fn set_tracked(&mut self) {
614 619 self.flags.insert(Flags::WDIR_TRACKED);
615 620 // `set_tracked` is replacing various `normallookup` call. So we mark
616 621 // the files as needing lookup
617 622 //
618 623 // Consider dropping this in the future in favor of something less
619 624 // broad.
620 625 self.mtime = None;
621 626 }
622 627
623 628 pub fn set_untracked(&mut self) {
624 629 self.flags.remove(Flags::WDIR_TRACKED);
625 630 self.mode_size = None;
626 631 self.mtime = None;
627 632 }
628 633
629 634 /// Returns `(state, mode, size, mtime)` for the puprose of serialization
630 635 /// in the dirstate-v1 format.
631 636 ///
632 637 /// This includes marker values such as `mtime == -1`. In the future we may
633 638 /// want to not represent these cases that way in memory, but serialization
634 639 /// will need to keep the same format.
635 640 pub fn v1_data(&self) -> (u8, i32, i32, i32) {
636 641 (
637 642 self.v1_state().into(),
638 643 self.v1_mode(),
639 644 self.v1_size(),
640 645 self.v1_mtime(),
641 646 )
642 647 }
643 648
644 649 pub(crate) fn is_from_other_parent(&self) -> bool {
645 650 self.state() == EntryState::Normal
646 651 && self.size() == SIZE_FROM_OTHER_PARENT
647 652 }
648 653
649 654 // TODO: other platforms
650 655 #[cfg(unix)]
651 656 pub fn mode_changed(
652 657 &self,
653 658 filesystem_metadata: &std::fs::Metadata,
654 659 ) -> bool {
655 660 let dirstate_exec_bit = (self.mode() as u32 & EXEC_BIT_MASK) != 0;
656 661 let fs_exec_bit = has_exec_bit(filesystem_metadata);
657 662 dirstate_exec_bit != fs_exec_bit
658 663 }
659 664
660 665 /// Returns a `(state, mode, size, mtime)` tuple as for
661 666 /// `DirstateMapMethods::debug_iter`.
662 667 pub fn debug_tuple(&self) -> (u8, i32, i32, i32) {
663 668 (self.state().into(), self.mode(), self.size(), self.mtime())
664 669 }
665 670 }
666 671
667 672 impl EntryState {
668 673 pub fn is_tracked(self) -> bool {
669 674 use EntryState::*;
670 675 match self {
671 676 Normal | Added | Merged => true,
672 677 Removed => false,
673 678 }
674 679 }
675 680 }
676 681
677 682 impl TryFrom<u8> for EntryState {
678 683 type Error = HgError;
679 684
680 685 fn try_from(value: u8) -> Result<Self, Self::Error> {
681 686 match value {
682 687 b'n' => Ok(EntryState::Normal),
683 688 b'a' => Ok(EntryState::Added),
684 689 b'r' => Ok(EntryState::Removed),
685 690 b'm' => Ok(EntryState::Merged),
686 691 _ => Err(HgError::CorruptedRepository(format!(
687 692 "Incorrect dirstate entry state {}",
688 693 value
689 694 ))),
690 695 }
691 696 }
692 697 }
693 698
694 699 impl Into<u8> for EntryState {
695 700 fn into(self) -> u8 {
696 701 match self {
697 702 EntryState::Normal => b'n',
698 703 EntryState::Added => b'a',
699 704 EntryState::Removed => b'r',
700 705 EntryState::Merged => b'm',
701 706 }
702 707 }
703 708 }
704 709
705 710 const EXEC_BIT_MASK: u32 = 0o100;
706 711
707 712 pub fn has_exec_bit(metadata: &std::fs::Metadata) -> bool {
708 713 // TODO: How to handle executable permissions on Windows?
709 714 use std::os::unix::fs::MetadataExt;
710 715 (metadata.mode() & EXEC_BIT_MASK) != 0
711 716 }
@@ -1,843 +1,843 b''
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
4 4
5 use crate::dirstate::TruncatedTimestamp;
5 use crate::dirstate::{DirstateV2Data, TruncatedTimestamp};
6 6 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
7 7 use crate::dirstate_tree::path_with_basename::WithBasename;
8 8 use crate::errors::HgError;
9 9 use crate::utils::hg_path::HgPath;
10 10 use crate::DirstateEntry;
11 11 use crate::DirstateError;
12 12 use crate::DirstateParents;
13 13 use bitflags::bitflags;
14 14 use bytes_cast::unaligned::{U16Be, U32Be};
15 15 use bytes_cast::BytesCast;
16 16 use format_bytes::format_bytes;
17 17 use rand::Rng;
18 18 use std::borrow::Cow;
19 19 use std::convert::{TryFrom, TryInto};
20 20 use std::fmt::Write;
21 21
22 22 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
23 23 /// This a redundant sanity check more than an actual "magic number" since
24 24 /// `.hg/requires` already governs which format should be used.
25 25 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
26 26
27 27 /// Keep space for 256-bit hashes
28 28 const STORED_NODE_ID_BYTES: usize = 32;
29 29
30 30 /// … even though only 160 bits are used for now, with SHA-1
31 31 const USED_NODE_ID_BYTES: usize = 20;
32 32
33 33 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
34 34 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
35 35
36 36 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
37 37 const TREE_METADATA_SIZE: usize = 44;
38 38 const NODE_SIZE: usize = 44;
39 39
40 40 /// Make sure that size-affecting changes are made knowingly
41 41 #[allow(unused)]
42 42 fn static_assert_size_of() {
43 43 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
44 44 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
45 45 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
46 46 }
47 47
48 48 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
49 49 #[derive(BytesCast)]
50 50 #[repr(C)]
51 51 struct DocketHeader {
52 52 marker: [u8; V2_FORMAT_MARKER.len()],
53 53 parent_1: [u8; STORED_NODE_ID_BYTES],
54 54 parent_2: [u8; STORED_NODE_ID_BYTES],
55 55
56 56 metadata: TreeMetadata,
57 57
58 58 /// Counted in bytes
59 59 data_size: Size,
60 60
61 61 uuid_size: u8,
62 62 }
63 63
64 64 pub struct Docket<'on_disk> {
65 65 header: &'on_disk DocketHeader,
66 66 pub uuid: &'on_disk [u8],
67 67 }
68 68
69 69 /// Fields are documented in the *Tree metadata in the docket file*
70 70 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
71 71 #[derive(BytesCast)]
72 72 #[repr(C)]
73 73 pub struct TreeMetadata {
74 74 root_nodes: ChildNodes,
75 75 nodes_with_entry_count: Size,
76 76 nodes_with_copy_source_count: Size,
77 77 unreachable_bytes: Size,
78 78 unused: [u8; 4],
79 79
80 80 /// See *Optional hash of ignore patterns* section of
81 81 /// `mercurial/helptext/internals/dirstate-v2.txt`
82 82 ignore_patterns_hash: IgnorePatternsHash,
83 83 }
84 84
85 85 /// Fields are documented in the *The data file format*
86 86 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
87 87 #[derive(BytesCast)]
88 88 #[repr(C)]
89 89 pub(super) struct Node {
90 90 full_path: PathSlice,
91 91
92 92 /// In bytes from `self.full_path.start`
93 93 base_name_start: PathSize,
94 94
95 95 copy_source: OptPathSlice,
96 96 children: ChildNodes,
97 97 pub(super) descendants_with_entry_count: Size,
98 98 pub(super) tracked_descendants_count: Size,
99 99 flags: U16Be,
100 100 size: U32Be,
101 101 mtime: PackedTruncatedTimestamp,
102 102 }
103 103
104 104 bitflags! {
105 105 #[repr(C)]
106 106 struct Flags: u16 {
107 107 const WDIR_TRACKED = 1 << 0;
108 108 const P1_TRACKED = 1 << 1;
109 109 const P2_INFO = 1 << 2;
110 110 const MODE_EXEC_PERM = 1 << 3;
111 111 const MODE_IS_SYMLINK = 1 << 4;
112 112 const HAS_FALLBACK_EXEC = 1 << 5;
113 113 const FALLBACK_EXEC = 1 << 6;
114 114 const HAS_FALLBACK_SYMLINK = 1 << 7;
115 115 const FALLBACK_SYMLINK = 1 << 8;
116 116 const EXPECTED_STATE_IS_MODIFIED = 1 << 9;
117 117 const HAS_MODE_AND_SIZE = 1 <<10;
118 118 const HAS_MTIME = 1 <<11;
119 119 const MTIME_SECOND_AMBIGUOUS = 1 << 12;
120 120 const DIRECTORY = 1 <<13;
121 121 const ALL_UNKNOWN_RECORDED = 1 <<14;
122 122 const ALL_IGNORED_RECORDED = 1 <<15;
123 123 }
124 124 }
125 125
126 126 /// Duration since the Unix epoch
127 127 #[derive(BytesCast, Copy, Clone)]
128 128 #[repr(C)]
129 129 struct PackedTruncatedTimestamp {
130 130 truncated_seconds: U32Be,
131 131 nanoseconds: U32Be,
132 132 }
133 133
134 134 /// Counted in bytes from the start of the file
135 135 ///
136 136 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
137 137 type Offset = U32Be;
138 138
139 139 /// Counted in number of items
140 140 ///
141 141 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
142 142 type Size = U32Be;
143 143
144 144 /// Counted in bytes
145 145 ///
146 146 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
147 147 type PathSize = U16Be;
148 148
149 149 /// A contiguous sequence of `len` times `Node`, representing the child nodes
150 150 /// of either some other node or of the repository root.
151 151 ///
152 152 /// Always sorted by ascending `full_path`, to allow binary search.
153 153 /// Since nodes with the same parent nodes also have the same parent path,
154 154 /// only the `base_name`s need to be compared during binary search.
155 155 #[derive(BytesCast, Copy, Clone)]
156 156 #[repr(C)]
157 157 struct ChildNodes {
158 158 start: Offset,
159 159 len: Size,
160 160 }
161 161
162 162 /// A `HgPath` of `len` bytes
163 163 #[derive(BytesCast, Copy, Clone)]
164 164 #[repr(C)]
165 165 struct PathSlice {
166 166 start: Offset,
167 167 len: PathSize,
168 168 }
169 169
170 170 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
171 171 type OptPathSlice = PathSlice;
172 172
173 173 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
174 174 ///
175 175 /// This should only happen if Mercurial is buggy or a repository is corrupted.
176 176 #[derive(Debug)]
177 177 pub struct DirstateV2ParseError;
178 178
179 179 impl From<DirstateV2ParseError> for HgError {
180 180 fn from(_: DirstateV2ParseError) -> Self {
181 181 HgError::corrupted("dirstate-v2 parse error")
182 182 }
183 183 }
184 184
185 185 impl From<DirstateV2ParseError> for crate::DirstateError {
186 186 fn from(error: DirstateV2ParseError) -> Self {
187 187 HgError::from(error).into()
188 188 }
189 189 }
190 190
191 191 impl TreeMetadata {
192 192 pub fn as_bytes(&self) -> &[u8] {
193 193 BytesCast::as_bytes(self)
194 194 }
195 195 }
196 196
197 197 impl<'on_disk> Docket<'on_disk> {
198 198 /// Generate the identifier for a new data file
199 199 ///
200 200 /// TODO: support the `HGTEST_UUIDFILE` environment variable.
201 201 /// See `mercurial/revlogutils/docket.py`
202 202 pub fn new_uid() -> String {
203 203 const ID_LENGTH: usize = 8;
204 204 let mut id = String::with_capacity(ID_LENGTH);
205 205 let mut rng = rand::thread_rng();
206 206 for _ in 0..ID_LENGTH {
207 207 // One random hexadecimal digit.
208 208 // `unwrap` never panics because `impl Write for String`
209 209 // never returns an error.
210 210 write!(&mut id, "{:x}", rng.gen_range(0..16)).unwrap();
211 211 }
212 212 id
213 213 }
214 214
215 215 pub fn serialize(
216 216 parents: DirstateParents,
217 217 tree_metadata: TreeMetadata,
218 218 data_size: u64,
219 219 uuid: &[u8],
220 220 ) -> Result<Vec<u8>, std::num::TryFromIntError> {
221 221 let header = DocketHeader {
222 222 marker: *V2_FORMAT_MARKER,
223 223 parent_1: parents.p1.pad_to_256_bits(),
224 224 parent_2: parents.p2.pad_to_256_bits(),
225 225 metadata: tree_metadata,
226 226 data_size: u32::try_from(data_size)?.into(),
227 227 uuid_size: uuid.len().try_into()?,
228 228 };
229 229 let header = header.as_bytes();
230 230 let mut docket = Vec::with_capacity(header.len() + uuid.len());
231 231 docket.extend_from_slice(header);
232 232 docket.extend_from_slice(uuid);
233 233 Ok(docket)
234 234 }
235 235
236 236 pub fn parents(&self) -> DirstateParents {
237 237 use crate::Node;
238 238 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
239 239 .unwrap()
240 240 .clone();
241 241 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
242 242 .unwrap()
243 243 .clone();
244 244 DirstateParents { p1, p2 }
245 245 }
246 246
247 247 pub fn tree_metadata(&self) -> &[u8] {
248 248 self.header.metadata.as_bytes()
249 249 }
250 250
251 251 pub fn data_size(&self) -> usize {
252 252 // This `unwrap` could only panic on a 16-bit CPU
253 253 self.header.data_size.get().try_into().unwrap()
254 254 }
255 255
256 256 pub fn data_filename(&self) -> String {
257 257 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
258 258 }
259 259 }
260 260
261 261 pub fn read_docket(
262 262 on_disk: &[u8],
263 263 ) -> Result<Docket<'_>, DirstateV2ParseError> {
264 264 let (header, uuid) =
265 265 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
266 266 let uuid_size = header.uuid_size as usize;
267 267 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
268 268 Ok(Docket { header, uuid })
269 269 } else {
270 270 Err(DirstateV2ParseError)
271 271 }
272 272 }
273 273
274 274 pub(super) fn read<'on_disk>(
275 275 on_disk: &'on_disk [u8],
276 276 metadata: &[u8],
277 277 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
278 278 if on_disk.is_empty() {
279 279 return Ok(DirstateMap::empty(on_disk));
280 280 }
281 281 let (meta, _) = TreeMetadata::from_bytes(metadata)
282 282 .map_err(|_| DirstateV2ParseError)?;
283 283 let dirstate_map = DirstateMap {
284 284 on_disk,
285 285 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
286 286 on_disk,
287 287 meta.root_nodes,
288 288 )?),
289 289 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
290 290 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
291 291 ignore_patterns_hash: meta.ignore_patterns_hash,
292 292 unreachable_bytes: meta.unreachable_bytes.get(),
293 293 };
294 294 Ok(dirstate_map)
295 295 }
296 296
297 297 impl Node {
298 298 pub(super) fn full_path<'on_disk>(
299 299 &self,
300 300 on_disk: &'on_disk [u8],
301 301 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
302 302 read_hg_path(on_disk, self.full_path)
303 303 }
304 304
305 305 pub(super) fn base_name_start<'on_disk>(
306 306 &self,
307 307 ) -> Result<usize, DirstateV2ParseError> {
308 308 let start = self.base_name_start.get();
309 309 if start < self.full_path.len.get() {
310 310 let start = usize::try_from(start)
311 311 // u32 -> usize, could only panic on a 16-bit CPU
312 312 .expect("dirstate-v2 base_name_start out of bounds");
313 313 Ok(start)
314 314 } else {
315 315 Err(DirstateV2ParseError)
316 316 }
317 317 }
318 318
319 319 pub(super) fn base_name<'on_disk>(
320 320 &self,
321 321 on_disk: &'on_disk [u8],
322 322 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
323 323 let full_path = self.full_path(on_disk)?;
324 324 let base_name_start = self.base_name_start()?;
325 325 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
326 326 }
327 327
328 328 pub(super) fn path<'on_disk>(
329 329 &self,
330 330 on_disk: &'on_disk [u8],
331 331 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
332 332 Ok(WithBasename::from_raw_parts(
333 333 Cow::Borrowed(self.full_path(on_disk)?),
334 334 self.base_name_start()?,
335 335 ))
336 336 }
337 337
338 338 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
339 339 self.copy_source.start.get() != 0
340 340 }
341 341
342 342 pub(super) fn copy_source<'on_disk>(
343 343 &self,
344 344 on_disk: &'on_disk [u8],
345 345 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
346 346 Ok(if self.has_copy_source() {
347 347 Some(read_hg_path(on_disk, self.copy_source)?)
348 348 } else {
349 349 None
350 350 })
351 351 }
352 352
353 353 fn flags(&self) -> Flags {
354 354 Flags::from_bits_truncate(self.flags.get())
355 355 }
356 356
357 357 fn has_entry(&self) -> bool {
358 358 self.flags().intersects(
359 359 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
360 360 )
361 361 }
362 362
363 363 pub(super) fn node_data(
364 364 &self,
365 365 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
366 366 if self.has_entry() {
367 367 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
368 368 } else if let Some(mtime) = self.cached_directory_mtime()? {
369 369 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
370 370 } else {
371 371 Ok(dirstate_map::NodeData::None)
372 372 }
373 373 }
374 374
375 375 pub(super) fn cached_directory_mtime(
376 376 &self,
377 377 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
378 378 // For now we do not have code to handle the absence of
379 379 // ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is
380 380 // unset.
381 381 if self.flags().contains(Flags::DIRECTORY)
382 382 && self.flags().contains(Flags::HAS_MTIME)
383 383 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
384 384 {
385 385 Ok(Some(self.mtime()?))
386 386 } else {
387 387 Ok(None)
388 388 }
389 389 }
390 390
391 391 fn synthesize_unix_mode(&self) -> u32 {
392 392 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
393 393 libc::S_IFLNK
394 394 } else {
395 395 libc::S_IFREG
396 396 };
397 397 let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
398 398 0o755
399 399 } else {
400 400 0o644
401 401 };
402 402 (file_type | permisions).into()
403 403 }
404 404
405 405 fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> {
406 406 let mut m: TruncatedTimestamp = self.mtime.try_into()?;
407 407 if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) {
408 408 m.second_ambiguous = true;
409 409 }
410 410 Ok(m)
411 411 }
412 412
413 413 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
414 414 // TODO: convert through raw bits instead?
415 let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED);
415 let wc_tracked = self.flags().contains(Flags::WDIR_TRACKED);
416 416 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
417 417 let p2_info = self.flags().contains(Flags::P2_INFO);
418 418 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
419 419 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
420 420 {
421 421 Some((self.synthesize_unix_mode(), self.size.into()))
422 422 } else {
423 423 None
424 424 };
425 425 let mtime = if self.flags().contains(Flags::HAS_MTIME)
426 426 && !self.flags().contains(Flags::DIRECTORY)
427 427 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
428 428 {
429 429 Some(self.mtime()?)
430 430 } else {
431 431 None
432 432 };
433 433 let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC)
434 434 {
435 435 Some(self.flags().contains(Flags::FALLBACK_EXEC))
436 436 } else {
437 437 None
438 438 };
439 439 let fallback_symlink =
440 440 if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) {
441 441 Some(self.flags().contains(Flags::FALLBACK_SYMLINK))
442 442 } else {
443 443 None
444 444 };
445 Ok(DirstateEntry::from_v2_data(
446 wdir_tracked,
445 Ok(DirstateEntry::from_v2_data(DirstateV2Data {
446 wc_tracked,
447 447 p1_tracked,
448 448 p2_info,
449 449 mode_size,
450 450 mtime,
451 451 fallback_exec,
452 452 fallback_symlink,
453 ))
453 }))
454 454 }
455 455
456 456 pub(super) fn entry(
457 457 &self,
458 458 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
459 459 if self.has_entry() {
460 460 Ok(Some(self.assume_entry()?))
461 461 } else {
462 462 Ok(None)
463 463 }
464 464 }
465 465
466 466 pub(super) fn children<'on_disk>(
467 467 &self,
468 468 on_disk: &'on_disk [u8],
469 469 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
470 470 read_nodes(on_disk, self.children)
471 471 }
472 472
473 473 pub(super) fn to_in_memory_node<'on_disk>(
474 474 &self,
475 475 on_disk: &'on_disk [u8],
476 476 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
477 477 Ok(dirstate_map::Node {
478 478 children: dirstate_map::ChildNodes::OnDisk(
479 479 self.children(on_disk)?,
480 480 ),
481 481 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
482 482 data: self.node_data()?,
483 483 descendants_with_entry_count: self
484 484 .descendants_with_entry_count
485 485 .get(),
486 486 tracked_descendants_count: self.tracked_descendants_count.get(),
487 487 })
488 488 }
489 489
490 490 fn from_dirstate_entry(
491 491 entry: &DirstateEntry,
492 492 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
493 let (
494 wdir_tracked,
493 let DirstateV2Data {
494 wc_tracked,
495 495 p1_tracked,
496 496 p2_info,
497 mode_size_opt,
498 mtime_opt,
497 mode_size: mode_size_opt,
498 mtime: mtime_opt,
499 499 fallback_exec,
500 500 fallback_symlink,
501 ) = entry.v2_data();
502 // TODO: convert throug raw flag bits instead?
501 } = entry.v2_data();
502 // TODO: convert through raw flag bits instead?
503 503 let mut flags = Flags::empty();
504 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
504 flags.set(Flags::WDIR_TRACKED, wc_tracked);
505 505 flags.set(Flags::P1_TRACKED, p1_tracked);
506 506 flags.set(Flags::P2_INFO, p2_info);
507 507 let size = if let Some((m, s)) = mode_size_opt {
508 508 let exec_perm = m & (libc::S_IXUSR as u32) != 0;
509 509 let is_symlink = m & (libc::S_IFMT as u32) == libc::S_IFLNK as u32;
510 510 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
511 511 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
512 512 flags.insert(Flags::HAS_MODE_AND_SIZE);
513 513 s.into()
514 514 } else {
515 515 0.into()
516 516 };
517 517 let mtime = if let Some(m) = mtime_opt {
518 518 flags.insert(Flags::HAS_MTIME);
519 519 if m.second_ambiguous {
520 520 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS);
521 521 };
522 522 m.into()
523 523 } else {
524 524 PackedTruncatedTimestamp::null()
525 525 };
526 526 if let Some(f_exec) = fallback_exec {
527 527 flags.insert(Flags::HAS_FALLBACK_EXEC);
528 528 if f_exec {
529 529 flags.insert(Flags::FALLBACK_EXEC);
530 530 }
531 531 }
532 532 if let Some(f_symlink) = fallback_symlink {
533 533 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
534 534 if f_symlink {
535 535 flags.insert(Flags::FALLBACK_SYMLINK);
536 536 }
537 537 }
538 538 (flags, size, mtime)
539 539 }
540 540 }
541 541
542 542 fn read_hg_path(
543 543 on_disk: &[u8],
544 544 slice: PathSlice,
545 545 ) -> Result<&HgPath, DirstateV2ParseError> {
546 546 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
547 547 }
548 548
549 549 fn read_nodes(
550 550 on_disk: &[u8],
551 551 slice: ChildNodes,
552 552 ) -> Result<&[Node], DirstateV2ParseError> {
553 553 read_slice(on_disk, slice.start, slice.len.get())
554 554 }
555 555
556 556 fn read_slice<T, Len>(
557 557 on_disk: &[u8],
558 558 start: Offset,
559 559 len: Len,
560 560 ) -> Result<&[T], DirstateV2ParseError>
561 561 where
562 562 T: BytesCast,
563 563 Len: TryInto<usize>,
564 564 {
565 565 // Either `usize::MAX` would result in "out of bounds" error since a single
566 566 // `&[u8]` cannot occupy the entire addess space.
567 567 let start = start.get().try_into().unwrap_or(std::usize::MAX);
568 568 let len = len.try_into().unwrap_or(std::usize::MAX);
569 569 on_disk
570 570 .get(start..)
571 571 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
572 572 .map(|(slice, _rest)| slice)
573 573 .ok_or_else(|| DirstateV2ParseError)
574 574 }
575 575
576 576 pub(crate) fn for_each_tracked_path<'on_disk>(
577 577 on_disk: &'on_disk [u8],
578 578 metadata: &[u8],
579 579 mut f: impl FnMut(&'on_disk HgPath),
580 580 ) -> Result<(), DirstateV2ParseError> {
581 581 let (meta, _) = TreeMetadata::from_bytes(metadata)
582 582 .map_err(|_| DirstateV2ParseError)?;
583 583 fn recur<'on_disk>(
584 584 on_disk: &'on_disk [u8],
585 585 nodes: ChildNodes,
586 586 f: &mut impl FnMut(&'on_disk HgPath),
587 587 ) -> Result<(), DirstateV2ParseError> {
588 588 for node in read_nodes(on_disk, nodes)? {
589 589 if let Some(entry) = node.entry()? {
590 590 if entry.state().is_tracked() {
591 591 f(node.full_path(on_disk)?)
592 592 }
593 593 }
594 594 recur(on_disk, node.children, f)?
595 595 }
596 596 Ok(())
597 597 }
598 598 recur(on_disk, meta.root_nodes, &mut f)
599 599 }
600 600
601 601 /// Returns new data and metadata, together with whether that data should be
602 602 /// appended to the existing data file whose content is at
603 603 /// `dirstate_map.on_disk` (true), instead of written to a new data file
604 604 /// (false).
605 605 pub(super) fn write(
606 606 dirstate_map: &DirstateMap,
607 607 can_append: bool,
608 608 ) -> Result<(Vec<u8>, TreeMetadata, bool), DirstateError> {
609 609 let append = can_append && dirstate_map.write_should_append();
610 610
611 611 // This ignores the space for paths, and for nodes without an entry.
612 612 // TODO: better estimate? Skip the `Vec` and write to a file directly?
613 613 let size_guess = std::mem::size_of::<Node>()
614 614 * dirstate_map.nodes_with_entry_count as usize;
615 615
616 616 let mut writer = Writer {
617 617 dirstate_map,
618 618 append,
619 619 out: Vec::with_capacity(size_guess),
620 620 };
621 621
622 622 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
623 623
624 624 let meta = TreeMetadata {
625 625 root_nodes,
626 626 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
627 627 nodes_with_copy_source_count: dirstate_map
628 628 .nodes_with_copy_source_count
629 629 .into(),
630 630 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
631 631 unused: [0; 4],
632 632 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
633 633 };
634 634 Ok((writer.out, meta, append))
635 635 }
636 636
637 637 struct Writer<'dmap, 'on_disk> {
638 638 dirstate_map: &'dmap DirstateMap<'on_disk>,
639 639 append: bool,
640 640 out: Vec<u8>,
641 641 }
642 642
643 643 impl Writer<'_, '_> {
644 644 fn write_nodes(
645 645 &mut self,
646 646 nodes: dirstate_map::ChildNodesRef,
647 647 ) -> Result<ChildNodes, DirstateError> {
648 648 // Reuse already-written nodes if possible
649 649 if self.append {
650 650 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
651 651 let start = self.on_disk_offset_of(nodes_slice).expect(
652 652 "dirstate-v2 OnDisk nodes not found within on_disk",
653 653 );
654 654 let len = child_nodes_len_from_usize(nodes_slice.len());
655 655 return Ok(ChildNodes { start, len });
656 656 }
657 657 }
658 658
659 659 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
660 660 // undefined iteration order. Sort to enable binary search in the
661 661 // written file.
662 662 let nodes = nodes.sorted();
663 663 let nodes_len = nodes.len();
664 664
665 665 // First accumulate serialized nodes in a `Vec`
666 666 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
667 667 for node in nodes {
668 668 let children =
669 669 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
670 670 let full_path = node.full_path(self.dirstate_map.on_disk)?;
671 671 let full_path = self.write_path(full_path.as_bytes());
672 672 let copy_source = if let Some(source) =
673 673 node.copy_source(self.dirstate_map.on_disk)?
674 674 {
675 675 self.write_path(source.as_bytes())
676 676 } else {
677 677 PathSlice {
678 678 start: 0.into(),
679 679 len: 0.into(),
680 680 }
681 681 };
682 682 on_disk_nodes.push(match node {
683 683 NodeRef::InMemory(path, node) => {
684 684 let (flags, size, mtime) = match &node.data {
685 685 dirstate_map::NodeData::Entry(entry) => {
686 686 Node::from_dirstate_entry(entry)
687 687 }
688 688 dirstate_map::NodeData::CachedDirectory { mtime } => {
689 689 // we currently never set a mtime if unknown file
690 690 // are present.
691 691 // So if we have a mtime for a directory, we know
692 692 // they are no unknown
693 693 // files and we
694 694 // blindly set ALL_UNKNOWN_RECORDED.
695 695 //
696 696 // We never set ALL_IGNORED_RECORDED since we
697 697 // don't track that case
698 698 // currently.
699 699 let mut flags = Flags::DIRECTORY
700 700 | Flags::HAS_MTIME
701 701 | Flags::ALL_UNKNOWN_RECORDED;
702 702 if mtime.second_ambiguous {
703 703 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS)
704 704 }
705 705 (flags, 0.into(), (*mtime).into())
706 706 }
707 707 dirstate_map::NodeData::None => (
708 708 Flags::DIRECTORY,
709 709 0.into(),
710 710 PackedTruncatedTimestamp::null(),
711 711 ),
712 712 };
713 713 Node {
714 714 children,
715 715 copy_source,
716 716 full_path,
717 717 base_name_start: u16::try_from(path.base_name_start())
718 718 // Could only panic for paths over 64 KiB
719 719 .expect("dirstate-v2 path length overflow")
720 720 .into(),
721 721 descendants_with_entry_count: node
722 722 .descendants_with_entry_count
723 723 .into(),
724 724 tracked_descendants_count: node
725 725 .tracked_descendants_count
726 726 .into(),
727 727 flags: flags.bits().into(),
728 728 size,
729 729 mtime,
730 730 }
731 731 }
732 732 NodeRef::OnDisk(node) => Node {
733 733 children,
734 734 copy_source,
735 735 full_path,
736 736 ..*node
737 737 },
738 738 })
739 739 }
740 740 // … so we can write them contiguously, after writing everything else
741 741 // they refer to.
742 742 let start = self.current_offset();
743 743 let len = child_nodes_len_from_usize(nodes_len);
744 744 self.out.extend(on_disk_nodes.as_bytes());
745 745 Ok(ChildNodes { start, len })
746 746 }
747 747
748 748 /// If the given slice of items is within `on_disk`, returns its offset
749 749 /// from the start of `on_disk`.
750 750 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
751 751 where
752 752 T: BytesCast,
753 753 {
754 754 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
755 755 let start = slice.as_ptr() as usize;
756 756 let end = start + slice.len();
757 757 start..=end
758 758 }
759 759 let slice_addresses = address_range(slice.as_bytes());
760 760 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
761 761 if on_disk_addresses.contains(slice_addresses.start())
762 762 && on_disk_addresses.contains(slice_addresses.end())
763 763 {
764 764 let offset = slice_addresses.start() - on_disk_addresses.start();
765 765 Some(offset_from_usize(offset))
766 766 } else {
767 767 None
768 768 }
769 769 }
770 770
771 771 fn current_offset(&mut self) -> Offset {
772 772 let mut offset = self.out.len();
773 773 if self.append {
774 774 offset += self.dirstate_map.on_disk.len()
775 775 }
776 776 offset_from_usize(offset)
777 777 }
778 778
779 779 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
780 780 let len = path_len_from_usize(slice.len());
781 781 // Reuse an already-written path if possible
782 782 if self.append {
783 783 if let Some(start) = self.on_disk_offset_of(slice) {
784 784 return PathSlice { start, len };
785 785 }
786 786 }
787 787 let start = self.current_offset();
788 788 self.out.extend(slice.as_bytes());
789 789 PathSlice { start, len }
790 790 }
791 791 }
792 792
793 793 fn offset_from_usize(x: usize) -> Offset {
794 794 u32::try_from(x)
795 795 // Could only panic for a dirstate file larger than 4 GiB
796 796 .expect("dirstate-v2 offset overflow")
797 797 .into()
798 798 }
799 799
800 800 fn child_nodes_len_from_usize(x: usize) -> Size {
801 801 u32::try_from(x)
802 802 // Could only panic with over 4 billion nodes
803 803 .expect("dirstate-v2 slice length overflow")
804 804 .into()
805 805 }
806 806
807 807 fn path_len_from_usize(x: usize) -> PathSize {
808 808 u16::try_from(x)
809 809 // Could only panic for paths over 64 KiB
810 810 .expect("dirstate-v2 path length overflow")
811 811 .into()
812 812 }
813 813
814 814 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
815 815 fn from(timestamp: TruncatedTimestamp) -> Self {
816 816 Self {
817 817 truncated_seconds: timestamp.truncated_seconds().into(),
818 818 nanoseconds: timestamp.nanoseconds().into(),
819 819 }
820 820 }
821 821 }
822 822
823 823 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
824 824 type Error = DirstateV2ParseError;
825 825
826 826 fn try_from(
827 827 timestamp: PackedTruncatedTimestamp,
828 828 ) -> Result<Self, Self::Error> {
829 829 Self::from_already_truncated(
830 830 timestamp.truncated_seconds.get(),
831 831 timestamp.nanoseconds.get(),
832 832 false,
833 833 )
834 834 }
835 835 }
836 836 impl PackedTruncatedTimestamp {
837 837 fn null() -> Self {
838 838 Self {
839 839 truncated_seconds: 0.into(),
840 840 nanoseconds: 0.into(),
841 841 }
842 842 }
843 843 }
@@ -1,246 +1,247 b''
1 1 use cpython::exc;
2 2 use cpython::ObjectProtocol;
3 3 use cpython::PyBytes;
4 4 use cpython::PyErr;
5 5 use cpython::PyNone;
6 6 use cpython::PyObject;
7 7 use cpython::PyResult;
8 8 use cpython::Python;
9 9 use cpython::PythonObject;
10 10 use hg::dirstate::DirstateEntry;
11 use hg::dirstate::DirstateV2Data;
11 12 use hg::dirstate::TruncatedTimestamp;
12 13 use std::cell::Cell;
13 14
14 15 py_class!(pub class DirstateItem |py| {
15 16 data entry: Cell<DirstateEntry>;
16 17
17 18 def __new__(
18 19 _cls,
19 20 wc_tracked: bool = false,
20 21 p1_tracked: bool = false,
21 22 p2_info: bool = false,
22 23 has_meaningful_data: bool = true,
23 24 has_meaningful_mtime: bool = true,
24 25 parentfiledata: Option<(u32, u32, Option<(u32, u32, bool)>)> = None,
25 26 fallback_exec: Option<bool> = None,
26 27 fallback_symlink: Option<bool> = None,
27 28
28 29 ) -> PyResult<DirstateItem> {
29 30 let mut mode_size_opt = None;
30 31 let mut mtime_opt = None;
31 32 if let Some((mode, size, mtime)) = parentfiledata {
32 33 if has_meaningful_data {
33 34 mode_size_opt = Some((mode, size))
34 35 }
35 36 if has_meaningful_mtime {
36 37 if let Some(m) = mtime {
37 38 mtime_opt = Some(timestamp(py, m)?);
38 39 }
39 40 }
40 41 }
41 let entry = DirstateEntry::from_v2_data(
42 wc_tracked,
42 let entry = DirstateEntry::from_v2_data(DirstateV2Data {
43 wc_tracked: wc_tracked,
43 44 p1_tracked,
44 45 p2_info,
45 mode_size_opt,
46 mtime_opt,
46 mode_size: mode_size_opt,
47 mtime: mtime_opt,
47 48 fallback_exec,
48 49 fallback_symlink,
49 );
50 });
50 51 DirstateItem::create_instance(py, Cell::new(entry))
51 52 }
52 53
53 54 @property
54 55 def state(&self) -> PyResult<PyBytes> {
55 56 let state_byte: u8 = self.entry(py).get().state().into();
56 57 Ok(PyBytes::new(py, &[state_byte]))
57 58 }
58 59
59 60 @property
60 61 def mode(&self) -> PyResult<i32> {
61 62 Ok(self.entry(py).get().mode())
62 63 }
63 64
64 65 @property
65 66 def size(&self) -> PyResult<i32> {
66 67 Ok(self.entry(py).get().size())
67 68 }
68 69
69 70 @property
70 71 def mtime(&self) -> PyResult<i32> {
71 72 Ok(self.entry(py).get().mtime())
72 73 }
73 74
74 75 @property
75 76 def has_fallback_exec(&self) -> PyResult<bool> {
76 77 match self.entry(py).get().get_fallback_exec() {
77 78 Some(_) => Ok(true),
78 79 None => Ok(false),
79 80 }
80 81 }
81 82
82 83 @property
83 84 def fallback_exec(&self) -> PyResult<Option<bool>> {
84 85 match self.entry(py).get().get_fallback_exec() {
85 86 Some(exec) => Ok(Some(exec)),
86 87 None => Ok(None),
87 88 }
88 89 }
89 90
90 91 @fallback_exec.setter
91 92 def set_fallback_exec(&self, value: Option<PyObject>) -> PyResult<()> {
92 93 match value {
93 94 None => {self.entry(py).get().set_fallback_exec(None);},
94 95 Some(value) => {
95 96 if value.is_none(py) {
96 97 self.entry(py).get().set_fallback_exec(None);
97 98 } else {
98 99 self.entry(py).get().set_fallback_exec(
99 100 Some(value.is_true(py)?)
100 101 );
101 102 }},
102 103 }
103 104 Ok(())
104 105 }
105 106
106 107 @property
107 108 def has_fallback_symlink(&self) -> PyResult<bool> {
108 109 match self.entry(py).get().get_fallback_symlink() {
109 110 Some(_) => Ok(true),
110 111 None => Ok(false),
111 112 }
112 113 }
113 114
114 115 @property
115 116 def fallback_symlink(&self) -> PyResult<Option<bool>> {
116 117 match self.entry(py).get().get_fallback_symlink() {
117 118 Some(symlink) => Ok(Some(symlink)),
118 119 None => Ok(None),
119 120 }
120 121 }
121 122
122 123 @fallback_symlink.setter
123 124 def set_fallback_symlink(&self, value: Option<PyObject>) -> PyResult<()> {
124 125 match value {
125 126 None => {self.entry(py).get().set_fallback_symlink(None);},
126 127 Some(value) => {
127 128 if value.is_none(py) {
128 129 self.entry(py).get().set_fallback_symlink(None);
129 130 } else {
130 131 self.entry(py).get().set_fallback_symlink(
131 132 Some(value.is_true(py)?)
132 133 );
133 134 }},
134 135 }
135 136 Ok(())
136 137 }
137 138
138 139 @property
139 140 def tracked(&self) -> PyResult<bool> {
140 141 Ok(self.entry(py).get().tracked())
141 142 }
142 143
143 144 @property
144 145 def p1_tracked(&self) -> PyResult<bool> {
145 146 Ok(self.entry(py).get().p1_tracked())
146 147 }
147 148
148 149 @property
149 150 def added(&self) -> PyResult<bool> {
150 151 Ok(self.entry(py).get().added())
151 152 }
152 153
153 154
154 155 @property
155 156 def p2_info(&self) -> PyResult<bool> {
156 157 Ok(self.entry(py).get().p2_info())
157 158 }
158 159
159 160 @property
160 161 def removed(&self) -> PyResult<bool> {
161 162 Ok(self.entry(py).get().removed())
162 163 }
163 164
164 165 @property
165 166 def maybe_clean(&self) -> PyResult<bool> {
166 167 Ok(self.entry(py).get().maybe_clean())
167 168 }
168 169
169 170 @property
170 171 def any_tracked(&self) -> PyResult<bool> {
171 172 Ok(self.entry(py).get().any_tracked())
172 173 }
173 174
174 175 def mtime_likely_equal_to(&self, other: (u32, u32, bool))
175 176 -> PyResult<bool> {
176 177 if let Some(mtime) = self.entry(py).get().truncated_mtime() {
177 178 Ok(mtime.likely_equal(timestamp(py, other)?))
178 179 } else {
179 180 Ok(false)
180 181 }
181 182 }
182 183
183 184 def drop_merge_data(&self) -> PyResult<PyNone> {
184 185 self.update(py, |entry| entry.drop_merge_data());
185 186 Ok(PyNone)
186 187 }
187 188
188 189 def set_clean(
189 190 &self,
190 191 mode: u32,
191 192 size: u32,
192 193 mtime: (u32, u32, bool),
193 194 ) -> PyResult<PyNone> {
194 195 let mtime = timestamp(py, mtime)?;
195 196 self.update(py, |entry| entry.set_clean(mode, size, mtime));
196 197 Ok(PyNone)
197 198 }
198 199
199 200 def set_possibly_dirty(&self) -> PyResult<PyNone> {
200 201 self.update(py, |entry| entry.set_possibly_dirty());
201 202 Ok(PyNone)
202 203 }
203 204
204 205 def set_tracked(&self) -> PyResult<PyNone> {
205 206 self.update(py, |entry| entry.set_tracked());
206 207 Ok(PyNone)
207 208 }
208 209
209 210 def set_untracked(&self) -> PyResult<PyNone> {
210 211 self.update(py, |entry| entry.set_untracked());
211 212 Ok(PyNone)
212 213 }
213 214 });
214 215
215 216 impl DirstateItem {
216 217 pub fn new_as_pyobject(
217 218 py: Python<'_>,
218 219 entry: DirstateEntry,
219 220 ) -> PyResult<PyObject> {
220 221 Ok(DirstateItem::create_instance(py, Cell::new(entry))?.into_object())
221 222 }
222 223
223 224 pub fn get_entry(&self, py: Python<'_>) -> DirstateEntry {
224 225 self.entry(py).get()
225 226 }
226 227
227 228 // TODO: Use https://doc.rust-lang.org/std/cell/struct.Cell.html#method.update instead when it’s stable
228 229 pub fn update(&self, py: Python<'_>, f: impl FnOnce(&mut DirstateEntry)) {
229 230 let mut entry = self.entry(py).get();
230 231 f(&mut entry);
231 232 self.entry(py).set(entry)
232 233 }
233 234 }
234 235
235 236 pub(crate) fn timestamp(
236 237 py: Python<'_>,
237 238 (s, ns, second_ambiguous): (u32, u32, bool),
238 239 ) -> PyResult<TruncatedTimestamp> {
239 240 TruncatedTimestamp::from_already_truncated(s, ns, second_ambiguous)
240 241 .map_err(|_| {
241 242 PyErr::new::<exc::ValueError, _>(
242 243 py,
243 244 "expected mtime truncated to 31 bits",
244 245 )
245 246 })
246 247 }
General Comments 0
You need to be logged in to leave comments. Login now