##// END OF EJS Templates
rust: fix building on macOS (issue6801)...
Dan Villiom Podlaski Christiansen -
r51182:0cc19a53 stable
parent child Browse files
Show More
@@ -1,865 +1,865 b''
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
4 4
5 5 use crate::dirstate::{DirstateV2Data, TruncatedTimestamp};
6 6 use crate::dirstate_tree::dirstate_map::DirstateVersion;
7 7 use crate::dirstate_tree::dirstate_map::{
8 8 self, DirstateMap, DirstateMapWriteMode, NodeRef,
9 9 };
10 10 use crate::dirstate_tree::path_with_basename::WithBasename;
11 11 use crate::errors::HgError;
12 12 use crate::utils::hg_path::HgPath;
13 13 use crate::DirstateEntry;
14 14 use crate::DirstateError;
15 15 use crate::DirstateParents;
16 16 use bitflags::bitflags;
17 17 use bytes_cast::unaligned::{U16Be, U32Be};
18 18 use bytes_cast::BytesCast;
19 19 use format_bytes::format_bytes;
20 20 use rand::Rng;
21 21 use std::borrow::Cow;
22 22 use std::fmt::Write;
23 23
24 24 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
25 25 /// This a redundant sanity check more than an actual "magic number" since
26 26 /// `.hg/requires` already governs which format should be used.
27 27 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
28 28
29 29 /// Keep space for 256-bit hashes
30 30 const STORED_NODE_ID_BYTES: usize = 32;
31 31
32 32 /// … even though only 160 bits are used for now, with SHA-1
33 33 const USED_NODE_ID_BYTES: usize = 20;
34 34
35 35 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
36 36 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
37 37
38 38 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
39 39 const TREE_METADATA_SIZE: usize = 44;
40 40 const NODE_SIZE: usize = 44;
41 41
42 42 /// Make sure that size-affecting changes are made knowingly
43 43 #[allow(unused)]
44 44 fn static_assert_size_of() {
45 45 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
46 46 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
47 47 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
48 48 }
49 49
50 50 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
51 51 #[derive(BytesCast)]
52 52 #[repr(C)]
53 53 struct DocketHeader {
54 54 marker: [u8; V2_FORMAT_MARKER.len()],
55 55 parent_1: [u8; STORED_NODE_ID_BYTES],
56 56 parent_2: [u8; STORED_NODE_ID_BYTES],
57 57
58 58 metadata: TreeMetadata,
59 59
60 60 /// Counted in bytes
61 61 data_size: Size,
62 62
63 63 uuid_size: u8,
64 64 }
65 65
66 66 pub struct Docket<'on_disk> {
67 67 header: &'on_disk DocketHeader,
68 68 pub uuid: &'on_disk [u8],
69 69 }
70 70
71 71 /// Fields are documented in the *Tree metadata in the docket file*
72 72 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
73 73 #[derive(BytesCast)]
74 74 #[repr(C)]
75 75 pub struct TreeMetadata {
76 76 root_nodes: ChildNodes,
77 77 nodes_with_entry_count: Size,
78 78 nodes_with_copy_source_count: Size,
79 79 unreachable_bytes: Size,
80 80 unused: [u8; 4],
81 81
82 82 /// See *Optional hash of ignore patterns* section of
83 83 /// `mercurial/helptext/internals/dirstate-v2.txt`
84 84 ignore_patterns_hash: IgnorePatternsHash,
85 85 }
86 86
87 87 /// Fields are documented in the *The data file format*
88 88 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
89 89 #[derive(BytesCast, Debug)]
90 90 #[repr(C)]
91 91 pub(super) struct Node {
92 92 full_path: PathSlice,
93 93
94 94 /// In bytes from `self.full_path.start`
95 95 base_name_start: PathSize,
96 96
97 97 copy_source: OptPathSlice,
98 98 children: ChildNodes,
99 99 pub(super) descendants_with_entry_count: Size,
100 100 pub(super) tracked_descendants_count: Size,
101 101 flags: U16Be,
102 102 size: U32Be,
103 103 mtime: PackedTruncatedTimestamp,
104 104 }
105 105
106 106 bitflags! {
107 107 #[repr(C)]
108 108 struct Flags: u16 {
109 109 const WDIR_TRACKED = 1 << 0;
110 110 const P1_TRACKED = 1 << 1;
111 111 const P2_INFO = 1 << 2;
112 112 const MODE_EXEC_PERM = 1 << 3;
113 113 const MODE_IS_SYMLINK = 1 << 4;
114 114 const HAS_FALLBACK_EXEC = 1 << 5;
115 115 const FALLBACK_EXEC = 1 << 6;
116 116 const HAS_FALLBACK_SYMLINK = 1 << 7;
117 117 const FALLBACK_SYMLINK = 1 << 8;
118 118 const EXPECTED_STATE_IS_MODIFIED = 1 << 9;
119 119 const HAS_MODE_AND_SIZE = 1 <<10;
120 120 const HAS_MTIME = 1 <<11;
121 121 const MTIME_SECOND_AMBIGUOUS = 1 << 12;
122 122 const DIRECTORY = 1 <<13;
123 123 const ALL_UNKNOWN_RECORDED = 1 <<14;
124 124 const ALL_IGNORED_RECORDED = 1 <<15;
125 125 }
126 126 }
127 127
128 128 /// Duration since the Unix epoch
129 129 #[derive(BytesCast, Copy, Clone, Debug)]
130 130 #[repr(C)]
131 131 struct PackedTruncatedTimestamp {
132 132 truncated_seconds: U32Be,
133 133 nanoseconds: U32Be,
134 134 }
135 135
136 136 /// Counted in bytes from the start of the file
137 137 ///
138 138 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
139 139 type Offset = U32Be;
140 140
141 141 /// Counted in number of items
142 142 ///
143 143 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
144 144 type Size = U32Be;
145 145
146 146 /// Counted in bytes
147 147 ///
148 148 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
149 149 type PathSize = U16Be;
150 150
151 151 /// A contiguous sequence of `len` times `Node`, representing the child nodes
152 152 /// of either some other node or of the repository root.
153 153 ///
154 154 /// Always sorted by ascending `full_path`, to allow binary search.
155 155 /// Since nodes with the same parent nodes also have the same parent path,
156 156 /// only the `base_name`s need to be compared during binary search.
157 157 #[derive(BytesCast, Copy, Clone, Debug)]
158 158 #[repr(C)]
159 159 struct ChildNodes {
160 160 start: Offset,
161 161 len: Size,
162 162 }
163 163
164 164 /// A `HgPath` of `len` bytes
165 165 #[derive(BytesCast, Copy, Clone, Debug)]
166 166 #[repr(C)]
167 167 struct PathSlice {
168 168 start: Offset,
169 169 len: PathSize,
170 170 }
171 171
172 172 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
173 173 type OptPathSlice = PathSlice;
174 174
175 175 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
176 176 ///
177 177 /// This should only happen if Mercurial is buggy or a repository is corrupted.
178 178 #[derive(Debug)]
179 179 pub struct DirstateV2ParseError {
180 180 message: String,
181 181 }
182 182
183 183 impl DirstateV2ParseError {
184 184 pub fn new<S: Into<String>>(message: S) -> Self {
185 185 Self {
186 186 message: message.into(),
187 187 }
188 188 }
189 189 }
190 190
191 191 impl From<DirstateV2ParseError> for HgError {
192 192 fn from(e: DirstateV2ParseError) -> Self {
193 193 HgError::corrupted(format!("dirstate-v2 parse error: {}", e.message))
194 194 }
195 195 }
196 196
197 197 impl From<DirstateV2ParseError> for crate::DirstateError {
198 198 fn from(error: DirstateV2ParseError) -> Self {
199 199 HgError::from(error).into()
200 200 }
201 201 }
202 202
203 203 impl TreeMetadata {
204 204 pub fn as_bytes(&self) -> &[u8] {
205 205 BytesCast::as_bytes(self)
206 206 }
207 207 }
208 208
209 209 impl<'on_disk> Docket<'on_disk> {
210 210 /// Generate the identifier for a new data file
211 211 ///
212 212 /// TODO: support the `HGTEST_UUIDFILE` environment variable.
213 213 /// See `mercurial/revlogutils/docket.py`
214 214 pub fn new_uid() -> String {
215 215 const ID_LENGTH: usize = 8;
216 216 let mut id = String::with_capacity(ID_LENGTH);
217 217 let mut rng = rand::thread_rng();
218 218 for _ in 0..ID_LENGTH {
219 219 // One random hexadecimal digit.
220 220 // `unwrap` never panics because `impl Write for String`
221 221 // never returns an error.
222 222 write!(&mut id, "{:x}", rng.gen_range(0..16)).unwrap();
223 223 }
224 224 id
225 225 }
226 226
227 227 pub fn serialize(
228 228 parents: DirstateParents,
229 229 tree_metadata: TreeMetadata,
230 230 data_size: u64,
231 231 uuid: &[u8],
232 232 ) -> Result<Vec<u8>, std::num::TryFromIntError> {
233 233 let header = DocketHeader {
234 234 marker: *V2_FORMAT_MARKER,
235 235 parent_1: parents.p1.pad_to_256_bits(),
236 236 parent_2: parents.p2.pad_to_256_bits(),
237 237 metadata: tree_metadata,
238 238 data_size: u32::try_from(data_size)?.into(),
239 239 uuid_size: uuid.len().try_into()?,
240 240 };
241 241 let header = header.as_bytes();
242 242 let mut docket = Vec::with_capacity(header.len() + uuid.len());
243 243 docket.extend_from_slice(header);
244 244 docket.extend_from_slice(uuid);
245 245 Ok(docket)
246 246 }
247 247
248 248 pub fn parents(&self) -> DirstateParents {
249 249 use crate::Node;
250 250 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
251 251 .unwrap();
252 252 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
253 253 .unwrap();
254 254 DirstateParents { p1, p2 }
255 255 }
256 256
257 257 pub fn tree_metadata(&self) -> &[u8] {
258 258 self.header.metadata.as_bytes()
259 259 }
260 260
261 261 pub fn data_size(&self) -> usize {
262 262 // This `unwrap` could only panic on a 16-bit CPU
263 263 self.header.data_size.get().try_into().unwrap()
264 264 }
265 265
266 266 pub fn data_filename(&self) -> String {
267 267 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
268 268 }
269 269 }
270 270
271 271 pub fn read_docket(
272 272 on_disk: &[u8],
273 273 ) -> Result<Docket<'_>, DirstateV2ParseError> {
274 274 let (header, uuid) = DocketHeader::from_bytes(on_disk).map_err(|e| {
275 275 DirstateV2ParseError::new(format!("when reading docket, {}", e))
276 276 })?;
277 277 let uuid_size = header.uuid_size as usize;
278 278 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
279 279 Ok(Docket { header, uuid })
280 280 } else {
281 281 Err(DirstateV2ParseError::new(
282 282 "invalid format marker or uuid size",
283 283 ))
284 284 }
285 285 }
286 286
287 287 pub(super) fn read<'on_disk>(
288 288 on_disk: &'on_disk [u8],
289 289 metadata: &[u8],
290 290 uuid: Vec<u8>,
291 291 identity: Option<u64>,
292 292 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
293 293 if on_disk.is_empty() {
294 294 let mut map = DirstateMap::empty(on_disk);
295 295 map.dirstate_version = DirstateVersion::V2;
296 296 return Ok(map);
297 297 }
298 298 let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| {
299 299 DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e))
300 300 })?;
301 301 let dirstate_map = DirstateMap {
302 302 on_disk,
303 303 root: dirstate_map::ChildNodes::OnDisk(
304 304 read_nodes(on_disk, meta.root_nodes).map_err(|mut e| {
305 305 e.message = format!("{}, when reading root notes", e.message);
306 306 e
307 307 })?,
308 308 ),
309 309 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
310 310 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
311 311 ignore_patterns_hash: meta.ignore_patterns_hash,
312 312 unreachable_bytes: meta.unreachable_bytes.get(),
313 313 old_data_size: on_disk.len(),
314 314 old_uuid: Some(uuid),
315 315 identity,
316 316 dirstate_version: DirstateVersion::V2,
317 317 write_mode: DirstateMapWriteMode::Auto,
318 318 };
319 319 Ok(dirstate_map)
320 320 }
321 321
322 322 impl Node {
323 323 pub(super) fn full_path<'on_disk>(
324 324 &self,
325 325 on_disk: &'on_disk [u8],
326 326 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
327 327 read_hg_path(on_disk, self.full_path)
328 328 }
329 329
330 330 pub(super) fn base_name_start(
331 331 &self,
332 332 ) -> Result<usize, DirstateV2ParseError> {
333 333 let start = self.base_name_start.get();
334 334 if start < self.full_path.len.get() {
335 335 let start = usize::try_from(start)
336 336 // u32 -> usize, could only panic on a 16-bit CPU
337 337 .expect("dirstate-v2 base_name_start out of bounds");
338 338 Ok(start)
339 339 } else {
340 340 Err(DirstateV2ParseError::new("not enough bytes for base name"))
341 341 }
342 342 }
343 343
344 344 pub(super) fn base_name<'on_disk>(
345 345 &self,
346 346 on_disk: &'on_disk [u8],
347 347 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
348 348 let full_path = self.full_path(on_disk)?;
349 349 let base_name_start = self.base_name_start()?;
350 350 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
351 351 }
352 352
353 353 pub(super) fn path<'on_disk>(
354 354 &self,
355 355 on_disk: &'on_disk [u8],
356 356 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
357 357 Ok(WithBasename::from_raw_parts(
358 358 Cow::Borrowed(self.full_path(on_disk)?),
359 359 self.base_name_start()?,
360 360 ))
361 361 }
362 362
363 363 pub(super) fn has_copy_source(&self) -> bool {
364 364 self.copy_source.start.get() != 0
365 365 }
366 366
367 367 pub(super) fn copy_source<'on_disk>(
368 368 &self,
369 369 on_disk: &'on_disk [u8],
370 370 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
371 371 Ok(if self.has_copy_source() {
372 372 Some(read_hg_path(on_disk, self.copy_source)?)
373 373 } else {
374 374 None
375 375 })
376 376 }
377 377
378 378 fn flags(&self) -> Flags {
379 379 Flags::from_bits_truncate(self.flags.get())
380 380 }
381 381
382 382 fn has_entry(&self) -> bool {
383 383 self.flags().intersects(
384 384 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
385 385 )
386 386 }
387 387
388 388 pub(super) fn node_data(
389 389 &self,
390 390 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
391 391 if self.has_entry() {
392 392 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
393 393 } else if let Some(mtime) = self.cached_directory_mtime()? {
394 394 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
395 395 } else {
396 396 Ok(dirstate_map::NodeData::None)
397 397 }
398 398 }
399 399
400 400 pub(super) fn cached_directory_mtime(
401 401 &self,
402 402 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
403 403 // For now we do not have code to handle the absence of
404 404 // ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is
405 405 // unset.
406 406 if self.flags().contains(Flags::DIRECTORY)
407 407 && self.flags().contains(Flags::HAS_MTIME)
408 408 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
409 409 {
410 410 Ok(Some(self.mtime()?))
411 411 } else {
412 412 Ok(None)
413 413 }
414 414 }
415 415
416 416 fn synthesize_unix_mode(&self) -> u32 {
417 417 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
418 libc::S_IFLNK
418 libc::S_IFLNK as u32
419 419 } else {
420 libc::S_IFREG
420 libc::S_IFREG as u32
421 421 };
422 422 let permissions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
423 423 0o755
424 424 } else {
425 425 0o644
426 426 };
427 427 file_type | permissions
428 428 }
429 429
430 430 fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> {
431 431 let mut m: TruncatedTimestamp = self.mtime.try_into()?;
432 432 if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) {
433 433 m.second_ambiguous = true;
434 434 }
435 435 Ok(m)
436 436 }
437 437
438 438 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
439 439 // TODO: convert through raw bits instead?
440 440 let wc_tracked = self.flags().contains(Flags::WDIR_TRACKED);
441 441 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
442 442 let p2_info = self.flags().contains(Flags::P2_INFO);
443 443 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
444 444 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
445 445 {
446 446 Some((self.synthesize_unix_mode(), self.size.into()))
447 447 } else {
448 448 None
449 449 };
450 450 let mtime = if self.flags().contains(Flags::HAS_MTIME)
451 451 && !self.flags().contains(Flags::DIRECTORY)
452 452 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
453 453 {
454 454 Some(self.mtime()?)
455 455 } else {
456 456 None
457 457 };
458 458 let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC)
459 459 {
460 460 Some(self.flags().contains(Flags::FALLBACK_EXEC))
461 461 } else {
462 462 None
463 463 };
464 464 let fallback_symlink =
465 465 if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) {
466 466 Some(self.flags().contains(Flags::FALLBACK_SYMLINK))
467 467 } else {
468 468 None
469 469 };
470 470 Ok(DirstateEntry::from_v2_data(DirstateV2Data {
471 471 wc_tracked,
472 472 p1_tracked,
473 473 p2_info,
474 474 mode_size,
475 475 mtime,
476 476 fallback_exec,
477 477 fallback_symlink,
478 478 }))
479 479 }
480 480
481 481 pub(super) fn entry(
482 482 &self,
483 483 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
484 484 if self.has_entry() {
485 485 Ok(Some(self.assume_entry()?))
486 486 } else {
487 487 Ok(None)
488 488 }
489 489 }
490 490
491 491 pub(super) fn children<'on_disk>(
492 492 &self,
493 493 on_disk: &'on_disk [u8],
494 494 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
495 495 read_nodes(on_disk, self.children)
496 496 }
497 497
498 498 pub(super) fn to_in_memory_node<'on_disk>(
499 499 &self,
500 500 on_disk: &'on_disk [u8],
501 501 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
502 502 Ok(dirstate_map::Node {
503 503 children: dirstate_map::ChildNodes::OnDisk(
504 504 self.children(on_disk)?,
505 505 ),
506 506 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
507 507 data: self.node_data()?,
508 508 descendants_with_entry_count: self
509 509 .descendants_with_entry_count
510 510 .get(),
511 511 tracked_descendants_count: self.tracked_descendants_count.get(),
512 512 })
513 513 }
514 514
515 515 fn from_dirstate_entry(
516 516 entry: &DirstateEntry,
517 517 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
518 518 let DirstateV2Data {
519 519 wc_tracked,
520 520 p1_tracked,
521 521 p2_info,
522 522 mode_size: mode_size_opt,
523 523 mtime: mtime_opt,
524 524 fallback_exec,
525 525 fallback_symlink,
526 526 } = entry.v2_data();
527 527 // TODO: convert through raw flag bits instead?
528 528 let mut flags = Flags::empty();
529 529 flags.set(Flags::WDIR_TRACKED, wc_tracked);
530 530 flags.set(Flags::P1_TRACKED, p1_tracked);
531 531 flags.set(Flags::P2_INFO, p2_info);
532 532 let size = if let Some((m, s)) = mode_size_opt {
533 533 let exec_perm = m & (libc::S_IXUSR as u32) != 0;
534 534 let is_symlink = m & (libc::S_IFMT as u32) == libc::S_IFLNK as u32;
535 535 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
536 536 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
537 537 flags.insert(Flags::HAS_MODE_AND_SIZE);
538 538 s.into()
539 539 } else {
540 540 0.into()
541 541 };
542 542 let mtime = if let Some(m) = mtime_opt {
543 543 flags.insert(Flags::HAS_MTIME);
544 544 if m.second_ambiguous {
545 545 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS);
546 546 };
547 547 m.into()
548 548 } else {
549 549 PackedTruncatedTimestamp::null()
550 550 };
551 551 if let Some(f_exec) = fallback_exec {
552 552 flags.insert(Flags::HAS_FALLBACK_EXEC);
553 553 if f_exec {
554 554 flags.insert(Flags::FALLBACK_EXEC);
555 555 }
556 556 }
557 557 if let Some(f_symlink) = fallback_symlink {
558 558 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
559 559 if f_symlink {
560 560 flags.insert(Flags::FALLBACK_SYMLINK);
561 561 }
562 562 }
563 563 (flags, size, mtime)
564 564 }
565 565 }
566 566
567 567 fn read_hg_path(
568 568 on_disk: &[u8],
569 569 slice: PathSlice,
570 570 ) -> Result<&HgPath, DirstateV2ParseError> {
571 571 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
572 572 }
573 573
574 574 fn read_nodes(
575 575 on_disk: &[u8],
576 576 slice: ChildNodes,
577 577 ) -> Result<&[Node], DirstateV2ParseError> {
578 578 read_slice(on_disk, slice.start, slice.len.get())
579 579 }
580 580
581 581 fn read_slice<T, Len>(
582 582 on_disk: &[u8],
583 583 start: Offset,
584 584 len: Len,
585 585 ) -> Result<&[T], DirstateV2ParseError>
586 586 where
587 587 T: BytesCast,
588 588 Len: TryInto<usize>,
589 589 {
590 590 // Either `usize::MAX` would result in "out of bounds" error since a single
591 591 // `&[u8]` cannot occupy the entire addess space.
592 592 let start = start.get().try_into().unwrap_or(std::usize::MAX);
593 593 let len = len.try_into().unwrap_or(std::usize::MAX);
594 594 let bytes = match on_disk.get(start..) {
595 595 Some(bytes) => bytes,
596 596 None => {
597 597 return Err(DirstateV2ParseError::new(
598 598 "not enough bytes from disk",
599 599 ))
600 600 }
601 601 };
602 602 T::slice_from_bytes(bytes, len)
603 603 .map_err(|e| {
604 604 DirstateV2ParseError::new(format!("when reading a slice, {}", e))
605 605 })
606 606 .map(|(slice, _rest)| slice)
607 607 }
608 608
609 609 /// Returns new data and metadata, together with whether that data should be
610 610 /// appended to the existing data file whose content is at
611 611 /// `dirstate_map.on_disk` (true), instead of written to a new data file
612 612 /// (false), and the previous size of data on disk.
613 613 pub(super) fn write(
614 614 dirstate_map: &DirstateMap,
615 615 write_mode: DirstateMapWriteMode,
616 616 ) -> Result<(Vec<u8>, TreeMetadata, bool, usize), DirstateError> {
617 617 let append = match write_mode {
618 618 DirstateMapWriteMode::Auto => dirstate_map.write_should_append(),
619 619 DirstateMapWriteMode::ForceNewDataFile => false,
620 620 DirstateMapWriteMode::ForceAppend => true,
621 621 };
622 622 if append {
623 623 log::trace!("appending to the dirstate data file");
624 624 } else {
625 625 log::trace!("creating new dirstate data file");
626 626 }
627 627
628 628 // This ignores the space for paths, and for nodes without an entry.
629 629 // TODO: better estimate? Skip the `Vec` and write to a file directly?
630 630 let size_guess = std::mem::size_of::<Node>()
631 631 * dirstate_map.nodes_with_entry_count as usize;
632 632
633 633 let mut writer = Writer {
634 634 dirstate_map,
635 635 append,
636 636 out: Vec::with_capacity(size_guess),
637 637 };
638 638
639 639 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
640 640
641 641 let unreachable_bytes = if append {
642 642 dirstate_map.unreachable_bytes
643 643 } else {
644 644 0
645 645 };
646 646 let meta = TreeMetadata {
647 647 root_nodes,
648 648 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
649 649 nodes_with_copy_source_count: dirstate_map
650 650 .nodes_with_copy_source_count
651 651 .into(),
652 652 unreachable_bytes: unreachable_bytes.into(),
653 653 unused: [0; 4],
654 654 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
655 655 };
656 656 Ok((writer.out, meta, append, dirstate_map.old_data_size))
657 657 }
658 658
659 659 struct Writer<'dmap, 'on_disk> {
660 660 dirstate_map: &'dmap DirstateMap<'on_disk>,
661 661 append: bool,
662 662 out: Vec<u8>,
663 663 }
664 664
665 665 impl Writer<'_, '_> {
666 666 fn write_nodes(
667 667 &mut self,
668 668 nodes: dirstate_map::ChildNodesRef,
669 669 ) -> Result<ChildNodes, DirstateError> {
670 670 // Reuse already-written nodes if possible
671 671 if self.append {
672 672 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
673 673 let start = self.on_disk_offset_of(nodes_slice).expect(
674 674 "dirstate-v2 OnDisk nodes not found within on_disk",
675 675 );
676 676 let len = child_nodes_len_from_usize(nodes_slice.len());
677 677 return Ok(ChildNodes { start, len });
678 678 }
679 679 }
680 680
681 681 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
682 682 // undefined iteration order. Sort to enable binary search in the
683 683 // written file.
684 684 let nodes = nodes.sorted();
685 685 let nodes_len = nodes.len();
686 686
687 687 // First accumulate serialized nodes in a `Vec`
688 688 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
689 689 for node in nodes {
690 690 let children =
691 691 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
692 692 let full_path = node.full_path(self.dirstate_map.on_disk)?;
693 693 let full_path = self.write_path(full_path.as_bytes());
694 694 let copy_source = if let Some(source) =
695 695 node.copy_source(self.dirstate_map.on_disk)?
696 696 {
697 697 self.write_path(source.as_bytes())
698 698 } else {
699 699 PathSlice {
700 700 start: 0.into(),
701 701 len: 0.into(),
702 702 }
703 703 };
704 704 on_disk_nodes.push(match node {
705 705 NodeRef::InMemory(path, node) => {
706 706 let (flags, size, mtime) = match &node.data {
707 707 dirstate_map::NodeData::Entry(entry) => {
708 708 Node::from_dirstate_entry(entry)
709 709 }
710 710 dirstate_map::NodeData::CachedDirectory { mtime } => {
711 711 // we currently never set a mtime if unknown file
712 712 // are present.
713 713 // So if we have a mtime for a directory, we know
714 714 // they are no unknown
715 715 // files and we
716 716 // blindly set ALL_UNKNOWN_RECORDED.
717 717 //
718 718 // We never set ALL_IGNORED_RECORDED since we
719 719 // don't track that case
720 720 // currently.
721 721 let mut flags = Flags::DIRECTORY
722 722 | Flags::HAS_MTIME
723 723 | Flags::ALL_UNKNOWN_RECORDED;
724 724 if mtime.second_ambiguous {
725 725 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS)
726 726 }
727 727 (flags, 0.into(), (*mtime).into())
728 728 }
729 729 dirstate_map::NodeData::None => (
730 730 Flags::DIRECTORY,
731 731 0.into(),
732 732 PackedTruncatedTimestamp::null(),
733 733 ),
734 734 };
735 735 Node {
736 736 children,
737 737 copy_source,
738 738 full_path,
739 739 base_name_start: u16::try_from(path.base_name_start())
740 740 // Could only panic for paths over 64 KiB
741 741 .expect("dirstate-v2 path length overflow")
742 742 .into(),
743 743 descendants_with_entry_count: node
744 744 .descendants_with_entry_count
745 745 .into(),
746 746 tracked_descendants_count: node
747 747 .tracked_descendants_count
748 748 .into(),
749 749 flags: flags.bits().into(),
750 750 size,
751 751 mtime,
752 752 }
753 753 }
754 754 NodeRef::OnDisk(node) => Node {
755 755 children,
756 756 copy_source,
757 757 full_path,
758 758 ..*node
759 759 },
760 760 })
761 761 }
762 762 // … so we can write them contiguously, after writing everything else
763 763 // they refer to.
764 764 let start = self.current_offset();
765 765 let len = child_nodes_len_from_usize(nodes_len);
766 766 self.out.extend(on_disk_nodes.as_bytes());
767 767 Ok(ChildNodes { start, len })
768 768 }
769 769
770 770 /// If the given slice of items is within `on_disk`, returns its offset
771 771 /// from the start of `on_disk`.
772 772 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
773 773 where
774 774 T: BytesCast,
775 775 {
776 776 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
777 777 let start = slice.as_ptr() as usize;
778 778 let end = start + slice.len();
779 779 start..=end
780 780 }
781 781 let slice_addresses = address_range(slice.as_bytes());
782 782 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
783 783 if on_disk_addresses.contains(slice_addresses.start())
784 784 && on_disk_addresses.contains(slice_addresses.end())
785 785 {
786 786 let offset = slice_addresses.start() - on_disk_addresses.start();
787 787 Some(offset_from_usize(offset))
788 788 } else {
789 789 None
790 790 }
791 791 }
792 792
793 793 fn current_offset(&mut self) -> Offset {
794 794 let mut offset = self.out.len();
795 795 if self.append {
796 796 offset += self.dirstate_map.on_disk.len()
797 797 }
798 798 offset_from_usize(offset)
799 799 }
800 800
801 801 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
802 802 let len = path_len_from_usize(slice.len());
803 803 // Reuse an already-written path if possible
804 804 if self.append {
805 805 if let Some(start) = self.on_disk_offset_of(slice) {
806 806 return PathSlice { start, len };
807 807 }
808 808 }
809 809 let start = self.current_offset();
810 810 self.out.extend(slice.as_bytes());
811 811 PathSlice { start, len }
812 812 }
813 813 }
814 814
815 815 fn offset_from_usize(x: usize) -> Offset {
816 816 u32::try_from(x)
817 817 // Could only panic for a dirstate file larger than 4 GiB
818 818 .expect("dirstate-v2 offset overflow")
819 819 .into()
820 820 }
821 821
822 822 fn child_nodes_len_from_usize(x: usize) -> Size {
823 823 u32::try_from(x)
824 824 // Could only panic with over 4 billion nodes
825 825 .expect("dirstate-v2 slice length overflow")
826 826 .into()
827 827 }
828 828
829 829 fn path_len_from_usize(x: usize) -> PathSize {
830 830 u16::try_from(x)
831 831 // Could only panic for paths over 64 KiB
832 832 .expect("dirstate-v2 path length overflow")
833 833 .into()
834 834 }
835 835
836 836 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
837 837 fn from(timestamp: TruncatedTimestamp) -> Self {
838 838 Self {
839 839 truncated_seconds: timestamp.truncated_seconds().into(),
840 840 nanoseconds: timestamp.nanoseconds().into(),
841 841 }
842 842 }
843 843 }
844 844
845 845 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
846 846 type Error = DirstateV2ParseError;
847 847
848 848 fn try_from(
849 849 timestamp: PackedTruncatedTimestamp,
850 850 ) -> Result<Self, Self::Error> {
851 851 Self::from_already_truncated(
852 852 timestamp.truncated_seconds.get(),
853 853 timestamp.nanoseconds.get(),
854 854 false,
855 855 )
856 856 }
857 857 }
858 858 impl PackedTruncatedTimestamp {
859 859 fn null() -> Self {
860 860 Self {
861 861 truncated_seconds: 0.into(),
862 862 nanoseconds: 0.into(),
863 863 }
864 864 }
865 865 }
@@ -1,195 +1,205 b''
1 1 use crate::errors::{HgError, IoErrorContext, IoResultExt};
2 2 use memmap2::{Mmap, MmapOptions};
3 3 use std::io::{ErrorKind, Write};
4 4 use std::path::{Path, PathBuf};
5 5
6 6 /// Filesystem access abstraction for the contents of a given "base" diretory
7 7 #[derive(Clone, Copy)]
8 8 pub struct Vfs<'a> {
9 9 pub(crate) base: &'a Path,
10 10 }
11 11
12 12 struct FileNotFound(std::io::Error, PathBuf);
13 13
14 14 impl Vfs<'_> {
15 15 pub fn join(&self, relative_path: impl AsRef<Path>) -> PathBuf {
16 16 self.base.join(relative_path)
17 17 }
18 18
19 19 pub fn symlink_metadata(
20 20 &self,
21 21 relative_path: impl AsRef<Path>,
22 22 ) -> Result<std::fs::Metadata, HgError> {
23 23 let path = self.join(relative_path);
24 24 std::fs::symlink_metadata(&path).when_reading_file(&path)
25 25 }
26 26
27 27 pub fn read_link(
28 28 &self,
29 29 relative_path: impl AsRef<Path>,
30 30 ) -> Result<PathBuf, HgError> {
31 31 let path = self.join(relative_path);
32 32 std::fs::read_link(&path).when_reading_file(&path)
33 33 }
34 34
35 35 pub fn read(
36 36 &self,
37 37 relative_path: impl AsRef<Path>,
38 38 ) -> Result<Vec<u8>, HgError> {
39 39 let path = self.join(relative_path);
40 40 std::fs::read(&path).when_reading_file(&path)
41 41 }
42 42
43 43 /// Returns `Ok(None)` if the file does not exist.
44 44 pub fn try_read(
45 45 &self,
46 46 relative_path: impl AsRef<Path>,
47 47 ) -> Result<Option<Vec<u8>>, HgError> {
48 48 match self.read(relative_path) {
49 49 Err(e) => match &e {
50 50 HgError::IoError { error, .. } => match error.kind() {
51 51 ErrorKind::NotFound => Ok(None),
52 52 _ => Err(e),
53 53 },
54 54 _ => Err(e),
55 55 },
56 56 Ok(v) => Ok(Some(v)),
57 57 }
58 58 }
59 59
60 60 fn mmap_open_gen(
61 61 &self,
62 62 relative_path: impl AsRef<Path>,
63 63 ) -> Result<Result<Mmap, FileNotFound>, HgError> {
64 64 let path = self.join(relative_path);
65 65 let file = match std::fs::File::open(&path) {
66 66 Err(err) => {
67 67 if let ErrorKind::NotFound = err.kind() {
68 68 return Ok(Err(FileNotFound(err, path)));
69 69 };
70 70 return (Err(err)).when_reading_file(&path);
71 71 }
72 72 Ok(file) => file,
73 73 };
74 74 // TODO: what are the safety requirements here?
75 75 let mmap = unsafe { MmapOptions::new().map(&file) }
76 76 .when_reading_file(&path)?;
77 77 Ok(Ok(mmap))
78 78 }
79 79
80 80 pub fn mmap_open_opt(
81 81 &self,
82 82 relative_path: impl AsRef<Path>,
83 83 ) -> Result<Option<Mmap>, HgError> {
84 84 self.mmap_open_gen(relative_path).map(|res| res.ok())
85 85 }
86 86
87 87 pub fn mmap_open(
88 88 &self,
89 89 relative_path: impl AsRef<Path>,
90 90 ) -> Result<Mmap, HgError> {
91 91 match self.mmap_open_gen(relative_path)? {
92 92 Err(FileNotFound(err, path)) => Err(err).when_reading_file(&path),
93 93 Ok(res) => Ok(res),
94 94 }
95 95 }
96 96
97 97 pub fn rename(
98 98 &self,
99 99 relative_from: impl AsRef<Path>,
100 100 relative_to: impl AsRef<Path>,
101 101 ) -> Result<(), HgError> {
102 102 let from = self.join(relative_from);
103 103 let to = self.join(relative_to);
104 104 std::fs::rename(&from, &to)
105 105 .with_context(|| IoErrorContext::RenamingFile { from, to })
106 106 }
107 107
108 108 pub fn remove_file(
109 109 &self,
110 110 relative_path: impl AsRef<Path>,
111 111 ) -> Result<(), HgError> {
112 112 let path = self.join(relative_path);
113 113 std::fs::remove_file(&path)
114 114 .with_context(|| IoErrorContext::RemovingFile(path))
115 115 }
116 116
117 117 #[cfg(unix)]
118 118 pub fn create_symlink(
119 119 &self,
120 120 relative_link_path: impl AsRef<Path>,
121 121 target_path: impl AsRef<Path>,
122 122 ) -> Result<(), HgError> {
123 123 let link_path = self.join(relative_link_path);
124 124 std::os::unix::fs::symlink(target_path, &link_path)
125 125 .when_writing_file(&link_path)
126 126 }
127 127
128 128 /// Write `contents` into a temporary file, then rename to `relative_path`.
129 129 /// This makes writing to a file "atomic": a reader opening that path will
130 130 /// see either the previous contents of the file or the complete new
131 131 /// content, never a partial write.
132 132 pub fn atomic_write(
133 133 &self,
134 134 relative_path: impl AsRef<Path>,
135 135 contents: &[u8],
136 136 ) -> Result<(), HgError> {
137 137 let mut tmp = tempfile::NamedTempFile::new_in(self.base)
138 138 .when_writing_file(self.base)?;
139 139 tmp.write_all(contents)
140 140 .and_then(|()| tmp.flush())
141 141 .when_writing_file(tmp.path())?;
142 142 let path = self.join(relative_path);
143 143 tmp.persist(&path)
144 144 .map_err(|e| e.error)
145 145 .when_writing_file(&path)?;
146 146 Ok(())
147 147 }
148 148 }
149 149
150 150 fn fs_metadata(
151 151 path: impl AsRef<Path>,
152 152 ) -> Result<Option<std::fs::Metadata>, HgError> {
153 153 let path = path.as_ref();
154 154 match std::fs::metadata(path) {
155 155 Ok(meta) => Ok(Some(meta)),
156 156 Err(error) => match error.kind() {
157 157 // TODO: when we require a Rust version where `NotADirectory` is
158 158 // stable, invert this logic and return None for it and `NotFound`
159 159 // and propagate any other error.
160 160 ErrorKind::PermissionDenied => Err(error).with_context(|| {
161 161 IoErrorContext::ReadingMetadata(path.to_owned())
162 162 }),
163 163 _ => Ok(None),
164 164 },
165 165 }
166 166 }
167 167
168 168 pub(crate) fn is_dir(path: impl AsRef<Path>) -> Result<bool, HgError> {
169 169 Ok(fs_metadata(path)?.map_or(false, |meta| meta.is_dir()))
170 170 }
171 171
172 172 pub(crate) fn is_file(path: impl AsRef<Path>) -> Result<bool, HgError> {
173 173 Ok(fs_metadata(path)?.map_or(false, |meta| meta.is_file()))
174 174 }
175 175
176 176 /// Returns whether the given `path` is on a network file system.
177 177 /// Taken from `cargo`'s codebase.
178 178 #[cfg(target_os = "linux")]
179 179 pub(crate) fn is_on_nfs_mount(path: impl AsRef<Path>) -> bool {
180 180 use std::ffi::CString;
181 181 use std::mem;
182 182 use std::os::unix::prelude::*;
183 183
184 184 let path = match CString::new(path.as_ref().as_os_str().as_bytes()) {
185 185 Ok(path) => path,
186 186 Err(_) => return false,
187 187 };
188 188
189 189 unsafe {
190 190 let mut buf: libc::statfs = mem::zeroed();
191 191 let r = libc::statfs(path.as_ptr(), &mut buf);
192 192
193 193 r == 0 && buf.f_type as u32 == libc::NFS_SUPER_MAGIC as u32
194 194 }
195 195 }
196
197 /// Similar to what Cargo does; although detecting NFS (or non-local
198 /// file systems) _should_ be possible on other operating systems,
199 /// we'll just assume that mmap() works there, for now; after all,
200 /// _some_ functionality is better than a compile error, i.e. none at
201 /// all
202 #[cfg(not(target_os = "linux"))]
203 pub(crate) fn is_on_nfs_mount(_path: impl AsRef<Path>) -> bool {
204 false
205 }
General Comments 0
You need to be logged in to leave comments. Login now