##// END OF EJS Templates
rust-clippy: ignore clippy's recommendation for "useless" cast...
Raphaël Gomès -
r52012:d58e754f default
parent child Browse files
Show More
@@ -1,865 +1,869 b''
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
4 4
5 5 use crate::dirstate::{DirstateV2Data, TruncatedTimestamp};
6 6 use crate::dirstate_tree::dirstate_map::DirstateVersion;
7 7 use crate::dirstate_tree::dirstate_map::{
8 8 self, DirstateMap, DirstateMapWriteMode, NodeRef,
9 9 };
10 10 use crate::dirstate_tree::path_with_basename::WithBasename;
11 11 use crate::errors::HgError;
12 12 use crate::utils::hg_path::HgPath;
13 13 use crate::DirstateEntry;
14 14 use crate::DirstateError;
15 15 use crate::DirstateParents;
16 16 use bitflags::bitflags;
17 17 use bytes_cast::unaligned::{U16Be, U32Be};
18 18 use bytes_cast::BytesCast;
19 19 use format_bytes::format_bytes;
20 20 use rand::Rng;
21 21 use std::borrow::Cow;
22 22 use std::fmt::Write;
23 23
24 24 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
25 25 /// This a redundant sanity check more than an actual "magic number" since
26 26 /// `.hg/requires` already governs which format should be used.
27 27 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
28 28
29 29 /// Keep space for 256-bit hashes
30 30 const STORED_NODE_ID_BYTES: usize = 32;
31 31
32 32 /// … even though only 160 bits are used for now, with SHA-1
33 33 const USED_NODE_ID_BYTES: usize = 20;
34 34
35 35 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
36 36 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
37 37
38 38 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
39 39 const TREE_METADATA_SIZE: usize = 44;
40 40 const NODE_SIZE: usize = 44;
41 41
42 42 /// Make sure that size-affecting changes are made knowingly
43 43 #[allow(unused)]
44 44 fn static_assert_size_of() {
45 45 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
46 46 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
47 47 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
48 48 }
49 49
50 50 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
51 51 #[derive(BytesCast)]
52 52 #[repr(C)]
53 53 struct DocketHeader {
54 54 marker: [u8; V2_FORMAT_MARKER.len()],
55 55 parent_1: [u8; STORED_NODE_ID_BYTES],
56 56 parent_2: [u8; STORED_NODE_ID_BYTES],
57 57
58 58 metadata: TreeMetadata,
59 59
60 60 /// Counted in bytes
61 61 data_size: Size,
62 62
63 63 uuid_size: u8,
64 64 }
65 65
66 66 pub struct Docket<'on_disk> {
67 67 header: &'on_disk DocketHeader,
68 68 pub uuid: &'on_disk [u8],
69 69 }
70 70
71 71 /// Fields are documented in the *Tree metadata in the docket file*
72 72 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
73 73 #[derive(BytesCast)]
74 74 #[repr(C)]
75 75 pub struct TreeMetadata {
76 76 root_nodes: ChildNodes,
77 77 nodes_with_entry_count: Size,
78 78 nodes_with_copy_source_count: Size,
79 79 unreachable_bytes: Size,
80 80 unused: [u8; 4],
81 81
82 82 /// See *Optional hash of ignore patterns* section of
83 83 /// `mercurial/helptext/internals/dirstate-v2.txt`
84 84 ignore_patterns_hash: IgnorePatternsHash,
85 85 }
86 86
87 87 /// Fields are documented in the *The data file format*
88 88 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
89 89 #[derive(BytesCast, Debug)]
90 90 #[repr(C)]
91 91 pub(super) struct Node {
92 92 full_path: PathSlice,
93 93
94 94 /// In bytes from `self.full_path.start`
95 95 base_name_start: PathSize,
96 96
97 97 copy_source: OptPathSlice,
98 98 children: ChildNodes,
99 99 pub(super) descendants_with_entry_count: Size,
100 100 pub(super) tracked_descendants_count: Size,
101 101 flags: U16Be,
102 102 size: U32Be,
103 103 mtime: PackedTruncatedTimestamp,
104 104 }
105 105
106 106 bitflags! {
107 107 #[repr(C)]
108 108 struct Flags: u16 {
109 109 const WDIR_TRACKED = 1 << 0;
110 110 const P1_TRACKED = 1 << 1;
111 111 const P2_INFO = 1 << 2;
112 112 const MODE_EXEC_PERM = 1 << 3;
113 113 const MODE_IS_SYMLINK = 1 << 4;
114 114 const HAS_FALLBACK_EXEC = 1 << 5;
115 115 const FALLBACK_EXEC = 1 << 6;
116 116 const HAS_FALLBACK_SYMLINK = 1 << 7;
117 117 const FALLBACK_SYMLINK = 1 << 8;
118 118 const EXPECTED_STATE_IS_MODIFIED = 1 << 9;
119 119 const HAS_MODE_AND_SIZE = 1 <<10;
120 120 const HAS_MTIME = 1 <<11;
121 121 const MTIME_SECOND_AMBIGUOUS = 1 << 12;
122 122 const DIRECTORY = 1 <<13;
123 123 const ALL_UNKNOWN_RECORDED = 1 <<14;
124 124 const ALL_IGNORED_RECORDED = 1 <<15;
125 125 }
126 126 }
127 127
128 128 /// Duration since the Unix epoch
129 129 #[derive(BytesCast, Copy, Clone, Debug)]
130 130 #[repr(C)]
131 131 struct PackedTruncatedTimestamp {
132 132 truncated_seconds: U32Be,
133 133 nanoseconds: U32Be,
134 134 }
135 135
136 136 /// Counted in bytes from the start of the file
137 137 ///
138 138 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
139 139 type Offset = U32Be;
140 140
141 141 /// Counted in number of items
142 142 ///
143 143 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
144 144 type Size = U32Be;
145 145
146 146 /// Counted in bytes
147 147 ///
148 148 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
149 149 type PathSize = U16Be;
150 150
151 151 /// A contiguous sequence of `len` times `Node`, representing the child nodes
152 152 /// of either some other node or of the repository root.
153 153 ///
154 154 /// Always sorted by ascending `full_path`, to allow binary search.
155 155 /// Since nodes with the same parent nodes also have the same parent path,
156 156 /// only the `base_name`s need to be compared during binary search.
157 157 #[derive(BytesCast, Copy, Clone, Debug)]
158 158 #[repr(C)]
159 159 struct ChildNodes {
160 160 start: Offset,
161 161 len: Size,
162 162 }
163 163
164 164 /// A `HgPath` of `len` bytes
165 165 #[derive(BytesCast, Copy, Clone, Debug)]
166 166 #[repr(C)]
167 167 struct PathSlice {
168 168 start: Offset,
169 169 len: PathSize,
170 170 }
171 171
172 172 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
173 173 type OptPathSlice = PathSlice;
174 174
175 175 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
176 176 ///
177 177 /// This should only happen if Mercurial is buggy or a repository is corrupted.
178 178 #[derive(Debug)]
179 179 pub struct DirstateV2ParseError {
180 180 message: String,
181 181 }
182 182
183 183 impl DirstateV2ParseError {
184 184 pub fn new<S: Into<String>>(message: S) -> Self {
185 185 Self {
186 186 message: message.into(),
187 187 }
188 188 }
189 189 }
190 190
191 191 impl From<DirstateV2ParseError> for HgError {
192 192 fn from(e: DirstateV2ParseError) -> Self {
193 193 HgError::corrupted(format!("dirstate-v2 parse error: {}", e.message))
194 194 }
195 195 }
196 196
197 197 impl From<DirstateV2ParseError> for crate::DirstateError {
198 198 fn from(error: DirstateV2ParseError) -> Self {
199 199 HgError::from(error).into()
200 200 }
201 201 }
202 202
203 203 impl TreeMetadata {
204 204 pub fn as_bytes(&self) -> &[u8] {
205 205 BytesCast::as_bytes(self)
206 206 }
207 207 }
208 208
209 209 impl<'on_disk> Docket<'on_disk> {
210 210 /// Generate the identifier for a new data file
211 211 ///
212 212 /// TODO: support the `HGTEST_UUIDFILE` environment variable.
213 213 /// See `mercurial/revlogutils/docket.py`
214 214 pub fn new_uid() -> String {
215 215 const ID_LENGTH: usize = 8;
216 216 let mut id = String::with_capacity(ID_LENGTH);
217 217 let mut rng = rand::thread_rng();
218 218 for _ in 0..ID_LENGTH {
219 219 // One random hexadecimal digit.
220 220 // `unwrap` never panics because `impl Write for String`
221 221 // never returns an error.
222 222 write!(&mut id, "{:x}", rng.gen_range(0..16)).unwrap();
223 223 }
224 224 id
225 225 }
226 226
227 227 pub fn serialize(
228 228 parents: DirstateParents,
229 229 tree_metadata: TreeMetadata,
230 230 data_size: u64,
231 231 uuid: &[u8],
232 232 ) -> Result<Vec<u8>, std::num::TryFromIntError> {
233 233 let header = DocketHeader {
234 234 marker: *V2_FORMAT_MARKER,
235 235 parent_1: parents.p1.pad_to_256_bits(),
236 236 parent_2: parents.p2.pad_to_256_bits(),
237 237 metadata: tree_metadata,
238 238 data_size: u32::try_from(data_size)?.into(),
239 239 uuid_size: uuid.len().try_into()?,
240 240 };
241 241 let header = header.as_bytes();
242 242 let mut docket = Vec::with_capacity(header.len() + uuid.len());
243 243 docket.extend_from_slice(header);
244 244 docket.extend_from_slice(uuid);
245 245 Ok(docket)
246 246 }
247 247
248 248 pub fn parents(&self) -> DirstateParents {
249 249 use crate::Node;
250 250 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
251 251 .unwrap();
252 252 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
253 253 .unwrap();
254 254 DirstateParents { p1, p2 }
255 255 }
256 256
257 257 pub fn tree_metadata(&self) -> &[u8] {
258 258 self.header.metadata.as_bytes()
259 259 }
260 260
261 261 pub fn data_size(&self) -> usize {
262 262 // This `unwrap` could only panic on a 16-bit CPU
263 263 self.header.data_size.get().try_into().unwrap()
264 264 }
265 265
266 266 pub fn data_filename(&self) -> String {
267 267 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
268 268 }
269 269 }
270 270
271 271 pub fn read_docket(
272 272 on_disk: &[u8],
273 273 ) -> Result<Docket<'_>, DirstateV2ParseError> {
274 274 let (header, uuid) = DocketHeader::from_bytes(on_disk).map_err(|e| {
275 275 DirstateV2ParseError::new(format!("when reading docket, {}", e))
276 276 })?;
277 277 let uuid_size = header.uuid_size as usize;
278 278 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
279 279 Ok(Docket { header, uuid })
280 280 } else {
281 281 Err(DirstateV2ParseError::new(
282 282 "invalid format marker or uuid size",
283 283 ))
284 284 }
285 285 }
286 286
287 287 pub(super) fn read<'on_disk>(
288 288 on_disk: &'on_disk [u8],
289 289 metadata: &[u8],
290 290 uuid: Vec<u8>,
291 291 identity: Option<u64>,
292 292 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
293 293 if on_disk.is_empty() {
294 294 let mut map = DirstateMap::empty(on_disk);
295 295 map.dirstate_version = DirstateVersion::V2;
296 296 return Ok(map);
297 297 }
298 298 let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| {
299 299 DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e))
300 300 })?;
301 301 let dirstate_map = DirstateMap {
302 302 on_disk,
303 303 root: dirstate_map::ChildNodes::OnDisk(
304 304 read_nodes(on_disk, meta.root_nodes).map_err(|mut e| {
305 305 e.message = format!("{}, when reading root notes", e.message);
306 306 e
307 307 })?,
308 308 ),
309 309 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
310 310 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
311 311 ignore_patterns_hash: meta.ignore_patterns_hash,
312 312 unreachable_bytes: meta.unreachable_bytes.get(),
313 313 old_data_size: on_disk.len(),
314 314 old_uuid: Some(uuid),
315 315 identity,
316 316 dirstate_version: DirstateVersion::V2,
317 317 write_mode: DirstateMapWriteMode::Auto,
318 318 };
319 319 Ok(dirstate_map)
320 320 }
321 321
322 322 impl Node {
323 323 pub(super) fn full_path<'on_disk>(
324 324 &self,
325 325 on_disk: &'on_disk [u8],
326 326 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
327 327 read_hg_path(on_disk, self.full_path)
328 328 }
329 329
330 330 pub(super) fn base_name_start(
331 331 &self,
332 332 ) -> Result<usize, DirstateV2ParseError> {
333 333 let start = self.base_name_start.get();
334 334 if start < self.full_path.len.get() {
335 335 let start = usize::try_from(start)
336 336 // u32 -> usize, could only panic on a 16-bit CPU
337 337 .expect("dirstate-v2 base_name_start out of bounds");
338 338 Ok(start)
339 339 } else {
340 340 Err(DirstateV2ParseError::new("not enough bytes for base name"))
341 341 }
342 342 }
343 343
344 344 pub(super) fn base_name<'on_disk>(
345 345 &self,
346 346 on_disk: &'on_disk [u8],
347 347 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
348 348 let full_path = self.full_path(on_disk)?;
349 349 let base_name_start = self.base_name_start()?;
350 350 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
351 351 }
352 352
353 353 pub(super) fn path<'on_disk>(
354 354 &self,
355 355 on_disk: &'on_disk [u8],
356 356 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
357 357 Ok(WithBasename::from_raw_parts(
358 358 Cow::Borrowed(self.full_path(on_disk)?),
359 359 self.base_name_start()?,
360 360 ))
361 361 }
362 362
363 363 pub(super) fn has_copy_source(&self) -> bool {
364 364 self.copy_source.start.get() != 0
365 365 }
366 366
367 367 pub(super) fn copy_source<'on_disk>(
368 368 &self,
369 369 on_disk: &'on_disk [u8],
370 370 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
371 371 Ok(if self.has_copy_source() {
372 372 Some(read_hg_path(on_disk, self.copy_source)?)
373 373 } else {
374 374 None
375 375 })
376 376 }
377 377
378 378 fn flags(&self) -> Flags {
379 379 Flags::from_bits_truncate(self.flags.get())
380 380 }
381 381
382 382 fn has_entry(&self) -> bool {
383 383 self.flags().intersects(
384 384 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
385 385 )
386 386 }
387 387
388 388 pub(super) fn node_data(
389 389 &self,
390 390 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
391 391 if self.has_entry() {
392 392 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
393 393 } else if let Some(mtime) = self.cached_directory_mtime()? {
394 394 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
395 395 } else {
396 396 Ok(dirstate_map::NodeData::None)
397 397 }
398 398 }
399 399
400 400 pub(super) fn cached_directory_mtime(
401 401 &self,
402 402 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
403 403 // For now we do not have code to handle the absence of
404 404 // ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is
405 405 // unset.
406 406 if self.flags().contains(Flags::DIRECTORY)
407 407 && self.flags().contains(Flags::HAS_MTIME)
408 408 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
409 409 {
410 410 Ok(Some(self.mtime()?))
411 411 } else {
412 412 Ok(None)
413 413 }
414 414 }
415 415
416 416 fn synthesize_unix_mode(&self) -> u32 {
417 // Some platforms' libc don't have the same type (MacOS uses i32 here)
418 #[allow(clippy::unnecessary_cast)]
417 419 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
418 420 libc::S_IFLNK as u32
419 421 } else {
420 422 libc::S_IFREG as u32
421 423 };
422 424 let permissions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
423 425 0o755
424 426 } else {
425 427 0o644
426 428 };
427 429 file_type | permissions
428 430 }
429 431
430 432 fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> {
431 433 let mut m: TruncatedTimestamp = self.mtime.try_into()?;
432 434 if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) {
433 435 m.second_ambiguous = true;
434 436 }
435 437 Ok(m)
436 438 }
437 439
438 440 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
439 441 // TODO: convert through raw bits instead?
440 442 let wc_tracked = self.flags().contains(Flags::WDIR_TRACKED);
441 443 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
442 444 let p2_info = self.flags().contains(Flags::P2_INFO);
443 445 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
444 446 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
445 447 {
446 448 Some((self.synthesize_unix_mode(), self.size.into()))
447 449 } else {
448 450 None
449 451 };
450 452 let mtime = if self.flags().contains(Flags::HAS_MTIME)
451 453 && !self.flags().contains(Flags::DIRECTORY)
452 454 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
453 455 {
454 456 Some(self.mtime()?)
455 457 } else {
456 458 None
457 459 };
458 460 let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC)
459 461 {
460 462 Some(self.flags().contains(Flags::FALLBACK_EXEC))
461 463 } else {
462 464 None
463 465 };
464 466 let fallback_symlink =
465 467 if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) {
466 468 Some(self.flags().contains(Flags::FALLBACK_SYMLINK))
467 469 } else {
468 470 None
469 471 };
470 472 Ok(DirstateEntry::from_v2_data(DirstateV2Data {
471 473 wc_tracked,
472 474 p1_tracked,
473 475 p2_info,
474 476 mode_size,
475 477 mtime,
476 478 fallback_exec,
477 479 fallback_symlink,
478 480 }))
479 481 }
480 482
481 483 pub(super) fn entry(
482 484 &self,
483 485 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
484 486 if self.has_entry() {
485 487 Ok(Some(self.assume_entry()?))
486 488 } else {
487 489 Ok(None)
488 490 }
489 491 }
490 492
491 493 pub(super) fn children<'on_disk>(
492 494 &self,
493 495 on_disk: &'on_disk [u8],
494 496 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
495 497 read_nodes(on_disk, self.children)
496 498 }
497 499
498 500 pub(super) fn to_in_memory_node<'on_disk>(
499 501 &self,
500 502 on_disk: &'on_disk [u8],
501 503 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
502 504 Ok(dirstate_map::Node {
503 505 children: dirstate_map::ChildNodes::OnDisk(
504 506 self.children(on_disk)?,
505 507 ),
506 508 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
507 509 data: self.node_data()?,
508 510 descendants_with_entry_count: self
509 511 .descendants_with_entry_count
510 512 .get(),
511 513 tracked_descendants_count: self.tracked_descendants_count.get(),
512 514 })
513 515 }
514 516
515 517 fn from_dirstate_entry(
516 518 entry: &DirstateEntry,
517 519 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
518 520 let DirstateV2Data {
519 521 wc_tracked,
520 522 p1_tracked,
521 523 p2_info,
522 524 mode_size: mode_size_opt,
523 525 mtime: mtime_opt,
524 526 fallback_exec,
525 527 fallback_symlink,
526 528 } = entry.v2_data();
527 529 // TODO: convert through raw flag bits instead?
528 530 let mut flags = Flags::empty();
529 531 flags.set(Flags::WDIR_TRACKED, wc_tracked);
530 532 flags.set(Flags::P1_TRACKED, p1_tracked);
531 533 flags.set(Flags::P2_INFO, p2_info);
534 // Some platforms' libc don't have the same type (MacOS uses i32 here)
535 #[allow(clippy::unnecessary_cast)]
532 536 let size = if let Some((m, s)) = mode_size_opt {
533 537 let exec_perm = m & (libc::S_IXUSR as u32) != 0;
534 538 let is_symlink = m & (libc::S_IFMT as u32) == libc::S_IFLNK as u32;
535 539 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
536 540 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
537 541 flags.insert(Flags::HAS_MODE_AND_SIZE);
538 542 s.into()
539 543 } else {
540 544 0.into()
541 545 };
542 546 let mtime = if let Some(m) = mtime_opt {
543 547 flags.insert(Flags::HAS_MTIME);
544 548 if m.second_ambiguous {
545 549 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS);
546 550 };
547 551 m.into()
548 552 } else {
549 553 PackedTruncatedTimestamp::null()
550 554 };
551 555 if let Some(f_exec) = fallback_exec {
552 556 flags.insert(Flags::HAS_FALLBACK_EXEC);
553 557 if f_exec {
554 558 flags.insert(Flags::FALLBACK_EXEC);
555 559 }
556 560 }
557 561 if let Some(f_symlink) = fallback_symlink {
558 562 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
559 563 if f_symlink {
560 564 flags.insert(Flags::FALLBACK_SYMLINK);
561 565 }
562 566 }
563 567 (flags, size, mtime)
564 568 }
565 569 }
566 570
567 571 fn read_hg_path(
568 572 on_disk: &[u8],
569 573 slice: PathSlice,
570 574 ) -> Result<&HgPath, DirstateV2ParseError> {
571 575 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
572 576 }
573 577
574 578 fn read_nodes(
575 579 on_disk: &[u8],
576 580 slice: ChildNodes,
577 581 ) -> Result<&[Node], DirstateV2ParseError> {
578 582 read_slice(on_disk, slice.start, slice.len.get())
579 583 }
580 584
581 585 fn read_slice<T, Len>(
582 586 on_disk: &[u8],
583 587 start: Offset,
584 588 len: Len,
585 589 ) -> Result<&[T], DirstateV2ParseError>
586 590 where
587 591 T: BytesCast,
588 592 Len: TryInto<usize>,
589 593 {
590 594 // Either `usize::MAX` would result in "out of bounds" error since a single
591 595 // `&[u8]` cannot occupy the entire addess space.
592 596 let start = start.get().try_into().unwrap_or(std::usize::MAX);
593 597 let len = len.try_into().unwrap_or(std::usize::MAX);
594 598 let bytes = match on_disk.get(start..) {
595 599 Some(bytes) => bytes,
596 600 None => {
597 601 return Err(DirstateV2ParseError::new(
598 602 "not enough bytes from disk",
599 603 ))
600 604 }
601 605 };
602 606 T::slice_from_bytes(bytes, len)
603 607 .map_err(|e| {
604 608 DirstateV2ParseError::new(format!("when reading a slice, {}", e))
605 609 })
606 610 .map(|(slice, _rest)| slice)
607 611 }
608 612
609 613 /// Returns new data and metadata, together with whether that data should be
610 614 /// appended to the existing data file whose content is at
611 615 /// `dirstate_map.on_disk` (true), instead of written to a new data file
612 616 /// (false), and the previous size of data on disk.
613 617 pub(super) fn write(
614 618 dirstate_map: &DirstateMap,
615 619 write_mode: DirstateMapWriteMode,
616 620 ) -> Result<(Vec<u8>, TreeMetadata, bool, usize), DirstateError> {
617 621 let append = match write_mode {
618 622 DirstateMapWriteMode::Auto => dirstate_map.write_should_append(),
619 623 DirstateMapWriteMode::ForceNewDataFile => false,
620 624 DirstateMapWriteMode::ForceAppend => true,
621 625 };
622 626 if append {
623 627 log::trace!("appending to the dirstate data file");
624 628 } else {
625 629 log::trace!("creating new dirstate data file");
626 630 }
627 631
628 632 // This ignores the space for paths, and for nodes without an entry.
629 633 // TODO: better estimate? Skip the `Vec` and write to a file directly?
630 634 let size_guess = std::mem::size_of::<Node>()
631 635 * dirstate_map.nodes_with_entry_count as usize;
632 636
633 637 let mut writer = Writer {
634 638 dirstate_map,
635 639 append,
636 640 out: Vec::with_capacity(size_guess),
637 641 };
638 642
639 643 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
640 644
641 645 let unreachable_bytes = if append {
642 646 dirstate_map.unreachable_bytes
643 647 } else {
644 648 0
645 649 };
646 650 let meta = TreeMetadata {
647 651 root_nodes,
648 652 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
649 653 nodes_with_copy_source_count: dirstate_map
650 654 .nodes_with_copy_source_count
651 655 .into(),
652 656 unreachable_bytes: unreachable_bytes.into(),
653 657 unused: [0; 4],
654 658 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
655 659 };
656 660 Ok((writer.out, meta, append, dirstate_map.old_data_size))
657 661 }
658 662
659 663 struct Writer<'dmap, 'on_disk> {
660 664 dirstate_map: &'dmap DirstateMap<'on_disk>,
661 665 append: bool,
662 666 out: Vec<u8>,
663 667 }
664 668
665 669 impl Writer<'_, '_> {
666 670 fn write_nodes(
667 671 &mut self,
668 672 nodes: dirstate_map::ChildNodesRef,
669 673 ) -> Result<ChildNodes, DirstateError> {
670 674 // Reuse already-written nodes if possible
671 675 if self.append {
672 676 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
673 677 let start = self.on_disk_offset_of(nodes_slice).expect(
674 678 "dirstate-v2 OnDisk nodes not found within on_disk",
675 679 );
676 680 let len = child_nodes_len_from_usize(nodes_slice.len());
677 681 return Ok(ChildNodes { start, len });
678 682 }
679 683 }
680 684
681 685 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
682 686 // undefined iteration order. Sort to enable binary search in the
683 687 // written file.
684 688 let nodes = nodes.sorted();
685 689 let nodes_len = nodes.len();
686 690
687 691 // First accumulate serialized nodes in a `Vec`
688 692 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
689 693 for node in nodes {
690 694 let children =
691 695 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
692 696 let full_path = node.full_path(self.dirstate_map.on_disk)?;
693 697 let full_path = self.write_path(full_path.as_bytes());
694 698 let copy_source = if let Some(source) =
695 699 node.copy_source(self.dirstate_map.on_disk)?
696 700 {
697 701 self.write_path(source.as_bytes())
698 702 } else {
699 703 PathSlice {
700 704 start: 0.into(),
701 705 len: 0.into(),
702 706 }
703 707 };
704 708 on_disk_nodes.push(match node {
705 709 NodeRef::InMemory(path, node) => {
706 710 let (flags, size, mtime) = match &node.data {
707 711 dirstate_map::NodeData::Entry(entry) => {
708 712 Node::from_dirstate_entry(entry)
709 713 }
710 714 dirstate_map::NodeData::CachedDirectory { mtime } => {
711 715 // we currently never set a mtime if unknown file
712 716 // are present.
713 717 // So if we have a mtime for a directory, we know
714 718 // they are no unknown
715 719 // files and we
716 720 // blindly set ALL_UNKNOWN_RECORDED.
717 721 //
718 722 // We never set ALL_IGNORED_RECORDED since we
719 723 // don't track that case
720 724 // currently.
721 725 let mut flags = Flags::DIRECTORY
722 726 | Flags::HAS_MTIME
723 727 | Flags::ALL_UNKNOWN_RECORDED;
724 728 if mtime.second_ambiguous {
725 729 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS)
726 730 }
727 731 (flags, 0.into(), (*mtime).into())
728 732 }
729 733 dirstate_map::NodeData::None => (
730 734 Flags::DIRECTORY,
731 735 0.into(),
732 736 PackedTruncatedTimestamp::null(),
733 737 ),
734 738 };
735 739 Node {
736 740 children,
737 741 copy_source,
738 742 full_path,
739 743 base_name_start: u16::try_from(path.base_name_start())
740 744 // Could only panic for paths over 64 KiB
741 745 .expect("dirstate-v2 path length overflow")
742 746 .into(),
743 747 descendants_with_entry_count: node
744 748 .descendants_with_entry_count
745 749 .into(),
746 750 tracked_descendants_count: node
747 751 .tracked_descendants_count
748 752 .into(),
749 753 flags: flags.bits().into(),
750 754 size,
751 755 mtime,
752 756 }
753 757 }
754 758 NodeRef::OnDisk(node) => Node {
755 759 children,
756 760 copy_source,
757 761 full_path,
758 762 ..*node
759 763 },
760 764 })
761 765 }
762 766 // … so we can write them contiguously, after writing everything else
763 767 // they refer to.
764 768 let start = self.current_offset();
765 769 let len = child_nodes_len_from_usize(nodes_len);
766 770 self.out.extend(on_disk_nodes.as_bytes());
767 771 Ok(ChildNodes { start, len })
768 772 }
769 773
770 774 /// If the given slice of items is within `on_disk`, returns its offset
771 775 /// from the start of `on_disk`.
772 776 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
773 777 where
774 778 T: BytesCast,
775 779 {
776 780 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
777 781 let start = slice.as_ptr() as usize;
778 782 let end = start + slice.len();
779 783 start..=end
780 784 }
781 785 let slice_addresses = address_range(slice.as_bytes());
782 786 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
783 787 if on_disk_addresses.contains(slice_addresses.start())
784 788 && on_disk_addresses.contains(slice_addresses.end())
785 789 {
786 790 let offset = slice_addresses.start() - on_disk_addresses.start();
787 791 Some(offset_from_usize(offset))
788 792 } else {
789 793 None
790 794 }
791 795 }
792 796
793 797 fn current_offset(&mut self) -> Offset {
794 798 let mut offset = self.out.len();
795 799 if self.append {
796 800 offset += self.dirstate_map.on_disk.len()
797 801 }
798 802 offset_from_usize(offset)
799 803 }
800 804
801 805 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
802 806 let len = path_len_from_usize(slice.len());
803 807 // Reuse an already-written path if possible
804 808 if self.append {
805 809 if let Some(start) = self.on_disk_offset_of(slice) {
806 810 return PathSlice { start, len };
807 811 }
808 812 }
809 813 let start = self.current_offset();
810 814 self.out.extend(slice.as_bytes());
811 815 PathSlice { start, len }
812 816 }
813 817 }
814 818
815 819 fn offset_from_usize(x: usize) -> Offset {
816 820 u32::try_from(x)
817 821 // Could only panic for a dirstate file larger than 4 GiB
818 822 .expect("dirstate-v2 offset overflow")
819 823 .into()
820 824 }
821 825
822 826 fn child_nodes_len_from_usize(x: usize) -> Size {
823 827 u32::try_from(x)
824 828 // Could only panic with over 4 billion nodes
825 829 .expect("dirstate-v2 slice length overflow")
826 830 .into()
827 831 }
828 832
829 833 fn path_len_from_usize(x: usize) -> PathSize {
830 834 u16::try_from(x)
831 835 // Could only panic for paths over 64 KiB
832 836 .expect("dirstate-v2 path length overflow")
833 837 .into()
834 838 }
835 839
836 840 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
837 841 fn from(timestamp: TruncatedTimestamp) -> Self {
838 842 Self {
839 843 truncated_seconds: timestamp.truncated_seconds().into(),
840 844 nanoseconds: timestamp.nanoseconds().into(),
841 845 }
842 846 }
843 847 }
844 848
845 849 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
846 850 type Error = DirstateV2ParseError;
847 851
848 852 fn try_from(
849 853 timestamp: PackedTruncatedTimestamp,
850 854 ) -> Result<Self, Self::Error> {
851 855 Self::from_already_truncated(
852 856 timestamp.truncated_seconds.get(),
853 857 timestamp.nanoseconds.get(),
854 858 false,
855 859 )
856 860 }
857 861 }
858 862 impl PackedTruncatedTimestamp {
859 863 fn null() -> Self {
860 864 Self {
861 865 truncated_seconds: 0.into(),
862 866 nanoseconds: 0.into(),
863 867 }
864 868 }
865 869 }
General Comments 0
You need to be logged in to leave comments. Login now