##// END OF EJS Templates
rust: fix build errors on darwin...
Dan Villiom Podlaski Christiansen -
r49356:d6c53b40 default
parent child Browse files
Show More
@@ -1,843 +1,843
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
4 4
5 5 use crate::dirstate::TruncatedTimestamp;
6 6 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
7 7 use crate::dirstate_tree::path_with_basename::WithBasename;
8 8 use crate::errors::HgError;
9 9 use crate::utils::hg_path::HgPath;
10 10 use crate::DirstateEntry;
11 11 use crate::DirstateError;
12 12 use crate::DirstateParents;
13 13 use bitflags::bitflags;
14 14 use bytes_cast::unaligned::{U16Be, U32Be};
15 15 use bytes_cast::BytesCast;
16 16 use format_bytes::format_bytes;
17 17 use rand::Rng;
18 18 use std::borrow::Cow;
19 19 use std::convert::{TryFrom, TryInto};
20 20 use std::fmt::Write;
21 21
22 22 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
23 23 /// This a redundant sanity check more than an actual "magic number" since
24 24 /// `.hg/requires` already governs which format should be used.
25 25 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
26 26
27 27 /// Keep space for 256-bit hashes
28 28 const STORED_NODE_ID_BYTES: usize = 32;
29 29
30 30 /// … even though only 160 bits are used for now, with SHA-1
31 31 const USED_NODE_ID_BYTES: usize = 20;
32 32
33 33 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
34 34 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
35 35
36 36 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
37 37 const TREE_METADATA_SIZE: usize = 44;
38 38 const NODE_SIZE: usize = 44;
39 39
40 40 /// Make sure that size-affecting changes are made knowingly
41 41 #[allow(unused)]
42 42 fn static_assert_size_of() {
43 43 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
44 44 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
45 45 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
46 46 }
47 47
48 48 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
49 49 #[derive(BytesCast)]
50 50 #[repr(C)]
51 51 struct DocketHeader {
52 52 marker: [u8; V2_FORMAT_MARKER.len()],
53 53 parent_1: [u8; STORED_NODE_ID_BYTES],
54 54 parent_2: [u8; STORED_NODE_ID_BYTES],
55 55
56 56 metadata: TreeMetadata,
57 57
58 58 /// Counted in bytes
59 59 data_size: Size,
60 60
61 61 uuid_size: u8,
62 62 }
63 63
64 64 pub struct Docket<'on_disk> {
65 65 header: &'on_disk DocketHeader,
66 66 pub uuid: &'on_disk [u8],
67 67 }
68 68
69 69 /// Fields are documented in the *Tree metadata in the docket file*
70 70 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
71 71 #[derive(BytesCast)]
72 72 #[repr(C)]
73 73 pub struct TreeMetadata {
74 74 root_nodes: ChildNodes,
75 75 nodes_with_entry_count: Size,
76 76 nodes_with_copy_source_count: Size,
77 77 unreachable_bytes: Size,
78 78 unused: [u8; 4],
79 79
80 80 /// See *Optional hash of ignore patterns* section of
81 81 /// `mercurial/helptext/internals/dirstate-v2.txt`
82 82 ignore_patterns_hash: IgnorePatternsHash,
83 83 }
84 84
85 85 /// Fields are documented in the *The data file format*
86 86 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
87 87 #[derive(BytesCast)]
88 88 #[repr(C)]
89 89 pub(super) struct Node {
90 90 full_path: PathSlice,
91 91
92 92 /// In bytes from `self.full_path.start`
93 93 base_name_start: PathSize,
94 94
95 95 copy_source: OptPathSlice,
96 96 children: ChildNodes,
97 97 pub(super) descendants_with_entry_count: Size,
98 98 pub(super) tracked_descendants_count: Size,
99 99 flags: U16Be,
100 100 size: U32Be,
101 101 mtime: PackedTruncatedTimestamp,
102 102 }
103 103
104 104 bitflags! {
105 105 #[repr(C)]
106 106 struct Flags: u16 {
107 107 const WDIR_TRACKED = 1 << 0;
108 108 const P1_TRACKED = 1 << 1;
109 109 const P2_INFO = 1 << 2;
110 110 const MODE_EXEC_PERM = 1 << 3;
111 111 const MODE_IS_SYMLINK = 1 << 4;
112 112 const HAS_FALLBACK_EXEC = 1 << 5;
113 113 const FALLBACK_EXEC = 1 << 6;
114 114 const HAS_FALLBACK_SYMLINK = 1 << 7;
115 115 const FALLBACK_SYMLINK = 1 << 8;
116 116 const EXPECTED_STATE_IS_MODIFIED = 1 << 9;
117 117 const HAS_MODE_AND_SIZE = 1 <<10;
118 118 const HAS_MTIME = 1 <<11;
119 119 const MTIME_SECOND_AMBIGUOUS = 1 << 12;
120 120 const DIRECTORY = 1 <<13;
121 121 const ALL_UNKNOWN_RECORDED = 1 <<14;
122 122 const ALL_IGNORED_RECORDED = 1 <<15;
123 123 }
124 124 }
125 125
126 126 /// Duration since the Unix epoch
127 127 #[derive(BytesCast, Copy, Clone)]
128 128 #[repr(C)]
129 129 struct PackedTruncatedTimestamp {
130 130 truncated_seconds: U32Be,
131 131 nanoseconds: U32Be,
132 132 }
133 133
134 134 /// Counted in bytes from the start of the file
135 135 ///
136 136 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
137 137 type Offset = U32Be;
138 138
139 139 /// Counted in number of items
140 140 ///
141 141 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
142 142 type Size = U32Be;
143 143
144 144 /// Counted in bytes
145 145 ///
146 146 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
147 147 type PathSize = U16Be;
148 148
149 149 /// A contiguous sequence of `len` times `Node`, representing the child nodes
150 150 /// of either some other node or of the repository root.
151 151 ///
152 152 /// Always sorted by ascending `full_path`, to allow binary search.
153 153 /// Since nodes with the same parent nodes also have the same parent path,
154 154 /// only the `base_name`s need to be compared during binary search.
155 155 #[derive(BytesCast, Copy, Clone)]
156 156 #[repr(C)]
157 157 struct ChildNodes {
158 158 start: Offset,
159 159 len: Size,
160 160 }
161 161
162 162 /// A `HgPath` of `len` bytes
163 163 #[derive(BytesCast, Copy, Clone)]
164 164 #[repr(C)]
165 165 struct PathSlice {
166 166 start: Offset,
167 167 len: PathSize,
168 168 }
169 169
170 170 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
171 171 type OptPathSlice = PathSlice;
172 172
173 173 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
174 174 ///
175 175 /// This should only happen if Mercurial is buggy or a repository is corrupted.
176 176 #[derive(Debug)]
177 177 pub struct DirstateV2ParseError;
178 178
179 179 impl From<DirstateV2ParseError> for HgError {
180 180 fn from(_: DirstateV2ParseError) -> Self {
181 181 HgError::corrupted("dirstate-v2 parse error")
182 182 }
183 183 }
184 184
185 185 impl From<DirstateV2ParseError> for crate::DirstateError {
186 186 fn from(error: DirstateV2ParseError) -> Self {
187 187 HgError::from(error).into()
188 188 }
189 189 }
190 190
191 191 impl TreeMetadata {
192 192 pub fn as_bytes(&self) -> &[u8] {
193 193 BytesCast::as_bytes(self)
194 194 }
195 195 }
196 196
197 197 impl<'on_disk> Docket<'on_disk> {
198 198 /// Generate the identifier for a new data file
199 199 ///
200 200 /// TODO: support the `HGTEST_UUIDFILE` environment variable.
201 201 /// See `mercurial/revlogutils/docket.py`
202 202 pub fn new_uid() -> String {
203 203 const ID_LENGTH: usize = 8;
204 204 let mut id = String::with_capacity(ID_LENGTH);
205 205 let mut rng = rand::thread_rng();
206 206 for _ in 0..ID_LENGTH {
207 207 // One random hexadecimal digit.
208 208 // `unwrap` never panics because `impl Write for String`
209 209 // never returns an error.
210 210 write!(&mut id, "{:x}", rng.gen_range(0, 16)).unwrap();
211 211 }
212 212 id
213 213 }
214 214
215 215 pub fn serialize(
216 216 parents: DirstateParents,
217 217 tree_metadata: TreeMetadata,
218 218 data_size: u64,
219 219 uuid: &[u8],
220 220 ) -> Result<Vec<u8>, std::num::TryFromIntError> {
221 221 let header = DocketHeader {
222 222 marker: *V2_FORMAT_MARKER,
223 223 parent_1: parents.p1.pad_to_256_bits(),
224 224 parent_2: parents.p2.pad_to_256_bits(),
225 225 metadata: tree_metadata,
226 226 data_size: u32::try_from(data_size)?.into(),
227 227 uuid_size: uuid.len().try_into()?,
228 228 };
229 229 let header = header.as_bytes();
230 230 let mut docket = Vec::with_capacity(header.len() + uuid.len());
231 231 docket.extend_from_slice(header);
232 232 docket.extend_from_slice(uuid);
233 233 Ok(docket)
234 234 }
235 235
236 236 pub fn parents(&self) -> DirstateParents {
237 237 use crate::Node;
238 238 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
239 239 .unwrap()
240 240 .clone();
241 241 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
242 242 .unwrap()
243 243 .clone();
244 244 DirstateParents { p1, p2 }
245 245 }
246 246
247 247 pub fn tree_metadata(&self) -> &[u8] {
248 248 self.header.metadata.as_bytes()
249 249 }
250 250
251 251 pub fn data_size(&self) -> usize {
252 252 // This `unwrap` could only panic on a 16-bit CPU
253 253 self.header.data_size.get().try_into().unwrap()
254 254 }
255 255
256 256 pub fn data_filename(&self) -> String {
257 257 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
258 258 }
259 259 }
260 260
261 261 pub fn read_docket(
262 262 on_disk: &[u8],
263 263 ) -> Result<Docket<'_>, DirstateV2ParseError> {
264 264 let (header, uuid) =
265 265 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
266 266 let uuid_size = header.uuid_size as usize;
267 267 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
268 268 Ok(Docket { header, uuid })
269 269 } else {
270 270 Err(DirstateV2ParseError)
271 271 }
272 272 }
273 273
274 274 pub(super) fn read<'on_disk>(
275 275 on_disk: &'on_disk [u8],
276 276 metadata: &[u8],
277 277 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
278 278 if on_disk.is_empty() {
279 279 return Ok(DirstateMap::empty(on_disk));
280 280 }
281 281 let (meta, _) = TreeMetadata::from_bytes(metadata)
282 282 .map_err(|_| DirstateV2ParseError)?;
283 283 let dirstate_map = DirstateMap {
284 284 on_disk,
285 285 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
286 286 on_disk,
287 287 meta.root_nodes,
288 288 )?),
289 289 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
290 290 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
291 291 ignore_patterns_hash: meta.ignore_patterns_hash,
292 292 unreachable_bytes: meta.unreachable_bytes.get(),
293 293 };
294 294 Ok(dirstate_map)
295 295 }
296 296
297 297 impl Node {
298 298 pub(super) fn full_path<'on_disk>(
299 299 &self,
300 300 on_disk: &'on_disk [u8],
301 301 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
302 302 read_hg_path(on_disk, self.full_path)
303 303 }
304 304
305 305 pub(super) fn base_name_start<'on_disk>(
306 306 &self,
307 307 ) -> Result<usize, DirstateV2ParseError> {
308 308 let start = self.base_name_start.get();
309 309 if start < self.full_path.len.get() {
310 310 let start = usize::try_from(start)
311 311 // u32 -> usize, could only panic on a 16-bit CPU
312 312 .expect("dirstate-v2 base_name_start out of bounds");
313 313 Ok(start)
314 314 } else {
315 315 Err(DirstateV2ParseError)
316 316 }
317 317 }
318 318
319 319 pub(super) fn base_name<'on_disk>(
320 320 &self,
321 321 on_disk: &'on_disk [u8],
322 322 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
323 323 let full_path = self.full_path(on_disk)?;
324 324 let base_name_start = self.base_name_start()?;
325 325 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
326 326 }
327 327
328 328 pub(super) fn path<'on_disk>(
329 329 &self,
330 330 on_disk: &'on_disk [u8],
331 331 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
332 332 Ok(WithBasename::from_raw_parts(
333 333 Cow::Borrowed(self.full_path(on_disk)?),
334 334 self.base_name_start()?,
335 335 ))
336 336 }
337 337
338 338 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
339 339 self.copy_source.start.get() != 0
340 340 }
341 341
342 342 pub(super) fn copy_source<'on_disk>(
343 343 &self,
344 344 on_disk: &'on_disk [u8],
345 345 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
346 346 Ok(if self.has_copy_source() {
347 347 Some(read_hg_path(on_disk, self.copy_source)?)
348 348 } else {
349 349 None
350 350 })
351 351 }
352 352
353 353 fn flags(&self) -> Flags {
354 354 Flags::from_bits_truncate(self.flags.get())
355 355 }
356 356
357 357 fn has_entry(&self) -> bool {
358 358 self.flags().intersects(
359 359 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
360 360 )
361 361 }
362 362
363 363 pub(super) fn node_data(
364 364 &self,
365 365 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
366 366 if self.has_entry() {
367 367 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
368 368 } else if let Some(mtime) = self.cached_directory_mtime()? {
369 369 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
370 370 } else {
371 371 Ok(dirstate_map::NodeData::None)
372 372 }
373 373 }
374 374
375 375 pub(super) fn cached_directory_mtime(
376 376 &self,
377 377 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
378 378 // For now we do not have code to handle the absence of
379 379 // ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is
380 380 // unset.
381 381 if self.flags().contains(Flags::DIRECTORY)
382 382 && self.flags().contains(Flags::HAS_MTIME)
383 383 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
384 384 {
385 385 Ok(Some(self.mtime()?))
386 386 } else {
387 387 Ok(None)
388 388 }
389 389 }
390 390
391 391 fn synthesize_unix_mode(&self) -> u32 {
392 392 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
393 393 libc::S_IFLNK
394 394 } else {
395 395 libc::S_IFREG
396 396 };
397 397 let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
398 398 0o755
399 399 } else {
400 400 0o644
401 401 };
402 file_type | permisions
402 (file_type | permisions).into()
403 403 }
404 404
405 405 fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> {
406 406 let mut m: TruncatedTimestamp = self.mtime.try_into()?;
407 407 if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) {
408 408 m.second_ambiguous = true;
409 409 }
410 410 Ok(m)
411 411 }
412 412
413 413 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
414 414 // TODO: convert through raw bits instead?
415 415 let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED);
416 416 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
417 417 let p2_info = self.flags().contains(Flags::P2_INFO);
418 418 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
419 419 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
420 420 {
421 421 Some((self.synthesize_unix_mode(), self.size.into()))
422 422 } else {
423 423 None
424 424 };
425 425 let mtime = if self.flags().contains(Flags::HAS_MTIME)
426 426 && !self.flags().contains(Flags::DIRECTORY)
427 427 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
428 428 {
429 429 Some(self.mtime()?)
430 430 } else {
431 431 None
432 432 };
433 433 let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC)
434 434 {
435 435 Some(self.flags().contains(Flags::FALLBACK_EXEC))
436 436 } else {
437 437 None
438 438 };
439 439 let fallback_symlink =
440 440 if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) {
441 441 Some(self.flags().contains(Flags::FALLBACK_SYMLINK))
442 442 } else {
443 443 None
444 444 };
445 445 Ok(DirstateEntry::from_v2_data(
446 446 wdir_tracked,
447 447 p1_tracked,
448 448 p2_info,
449 449 mode_size,
450 450 mtime,
451 451 fallback_exec,
452 452 fallback_symlink,
453 453 ))
454 454 }
455 455
456 456 pub(super) fn entry(
457 457 &self,
458 458 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
459 459 if self.has_entry() {
460 460 Ok(Some(self.assume_entry()?))
461 461 } else {
462 462 Ok(None)
463 463 }
464 464 }
465 465
466 466 pub(super) fn children<'on_disk>(
467 467 &self,
468 468 on_disk: &'on_disk [u8],
469 469 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
470 470 read_nodes(on_disk, self.children)
471 471 }
472 472
473 473 pub(super) fn to_in_memory_node<'on_disk>(
474 474 &self,
475 475 on_disk: &'on_disk [u8],
476 476 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
477 477 Ok(dirstate_map::Node {
478 478 children: dirstate_map::ChildNodes::OnDisk(
479 479 self.children(on_disk)?,
480 480 ),
481 481 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
482 482 data: self.node_data()?,
483 483 descendants_with_entry_count: self
484 484 .descendants_with_entry_count
485 485 .get(),
486 486 tracked_descendants_count: self.tracked_descendants_count.get(),
487 487 })
488 488 }
489 489
490 490 fn from_dirstate_entry(
491 491 entry: &DirstateEntry,
492 492 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
493 493 let (
494 494 wdir_tracked,
495 495 p1_tracked,
496 496 p2_info,
497 497 mode_size_opt,
498 498 mtime_opt,
499 499 fallback_exec,
500 500 fallback_symlink,
501 501 ) = entry.v2_data();
502 502 // TODO: convert throug raw flag bits instead?
503 503 let mut flags = Flags::empty();
504 504 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
505 505 flags.set(Flags::P1_TRACKED, p1_tracked);
506 506 flags.set(Flags::P2_INFO, p2_info);
507 507 let size = if let Some((m, s)) = mode_size_opt {
508 let exec_perm = m & libc::S_IXUSR != 0;
509 let is_symlink = m & libc::S_IFMT == libc::S_IFLNK;
508 let exec_perm = m & (libc::S_IXUSR as u32) != 0;
509 let is_symlink = m & (libc::S_IFMT as u32) == libc::S_IFLNK as u32;
510 510 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
511 511 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
512 512 flags.insert(Flags::HAS_MODE_AND_SIZE);
513 513 s.into()
514 514 } else {
515 515 0.into()
516 516 };
517 517 let mtime = if let Some(m) = mtime_opt {
518 518 flags.insert(Flags::HAS_MTIME);
519 519 if m.second_ambiguous {
520 520 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS);
521 521 };
522 522 m.into()
523 523 } else {
524 524 PackedTruncatedTimestamp::null()
525 525 };
526 526 if let Some(f_exec) = fallback_exec {
527 527 flags.insert(Flags::HAS_FALLBACK_EXEC);
528 528 if f_exec {
529 529 flags.insert(Flags::FALLBACK_EXEC);
530 530 }
531 531 }
532 532 if let Some(f_symlink) = fallback_symlink {
533 533 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
534 534 if f_symlink {
535 535 flags.insert(Flags::FALLBACK_SYMLINK);
536 536 }
537 537 }
538 538 (flags, size, mtime)
539 539 }
540 540 }
541 541
542 542 fn read_hg_path(
543 543 on_disk: &[u8],
544 544 slice: PathSlice,
545 545 ) -> Result<&HgPath, DirstateV2ParseError> {
546 546 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
547 547 }
548 548
549 549 fn read_nodes(
550 550 on_disk: &[u8],
551 551 slice: ChildNodes,
552 552 ) -> Result<&[Node], DirstateV2ParseError> {
553 553 read_slice(on_disk, slice.start, slice.len.get())
554 554 }
555 555
556 556 fn read_slice<T, Len>(
557 557 on_disk: &[u8],
558 558 start: Offset,
559 559 len: Len,
560 560 ) -> Result<&[T], DirstateV2ParseError>
561 561 where
562 562 T: BytesCast,
563 563 Len: TryInto<usize>,
564 564 {
565 565 // Either `usize::MAX` would result in "out of bounds" error since a single
566 566 // `&[u8]` cannot occupy the entire addess space.
567 567 let start = start.get().try_into().unwrap_or(std::usize::MAX);
568 568 let len = len.try_into().unwrap_or(std::usize::MAX);
569 569 on_disk
570 570 .get(start..)
571 571 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
572 572 .map(|(slice, _rest)| slice)
573 573 .ok_or_else(|| DirstateV2ParseError)
574 574 }
575 575
576 576 pub(crate) fn for_each_tracked_path<'on_disk>(
577 577 on_disk: &'on_disk [u8],
578 578 metadata: &[u8],
579 579 mut f: impl FnMut(&'on_disk HgPath),
580 580 ) -> Result<(), DirstateV2ParseError> {
581 581 let (meta, _) = TreeMetadata::from_bytes(metadata)
582 582 .map_err(|_| DirstateV2ParseError)?;
583 583 fn recur<'on_disk>(
584 584 on_disk: &'on_disk [u8],
585 585 nodes: ChildNodes,
586 586 f: &mut impl FnMut(&'on_disk HgPath),
587 587 ) -> Result<(), DirstateV2ParseError> {
588 588 for node in read_nodes(on_disk, nodes)? {
589 589 if let Some(entry) = node.entry()? {
590 590 if entry.state().is_tracked() {
591 591 f(node.full_path(on_disk)?)
592 592 }
593 593 }
594 594 recur(on_disk, node.children, f)?
595 595 }
596 596 Ok(())
597 597 }
598 598 recur(on_disk, meta.root_nodes, &mut f)
599 599 }
600 600
601 601 /// Returns new data and metadata, together with whether that data should be
602 602 /// appended to the existing data file whose content is at
603 603 /// `dirstate_map.on_disk` (true), instead of written to a new data file
604 604 /// (false).
605 605 pub(super) fn write(
606 606 dirstate_map: &DirstateMap,
607 607 can_append: bool,
608 608 ) -> Result<(Vec<u8>, TreeMetadata, bool), DirstateError> {
609 609 let append = can_append && dirstate_map.write_should_append();
610 610
611 611 // This ignores the space for paths, and for nodes without an entry.
612 612 // TODO: better estimate? Skip the `Vec` and write to a file directly?
613 613 let size_guess = std::mem::size_of::<Node>()
614 614 * dirstate_map.nodes_with_entry_count as usize;
615 615
616 616 let mut writer = Writer {
617 617 dirstate_map,
618 618 append,
619 619 out: Vec::with_capacity(size_guess),
620 620 };
621 621
622 622 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
623 623
624 624 let meta = TreeMetadata {
625 625 root_nodes,
626 626 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
627 627 nodes_with_copy_source_count: dirstate_map
628 628 .nodes_with_copy_source_count
629 629 .into(),
630 630 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
631 631 unused: [0; 4],
632 632 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
633 633 };
634 634 Ok((writer.out, meta, append))
635 635 }
636 636
637 637 struct Writer<'dmap, 'on_disk> {
638 638 dirstate_map: &'dmap DirstateMap<'on_disk>,
639 639 append: bool,
640 640 out: Vec<u8>,
641 641 }
642 642
643 643 impl Writer<'_, '_> {
644 644 fn write_nodes(
645 645 &mut self,
646 646 nodes: dirstate_map::ChildNodesRef,
647 647 ) -> Result<ChildNodes, DirstateError> {
648 648 // Reuse already-written nodes if possible
649 649 if self.append {
650 650 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
651 651 let start = self.on_disk_offset_of(nodes_slice).expect(
652 652 "dirstate-v2 OnDisk nodes not found within on_disk",
653 653 );
654 654 let len = child_nodes_len_from_usize(nodes_slice.len());
655 655 return Ok(ChildNodes { start, len });
656 656 }
657 657 }
658 658
659 659 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
660 660 // undefined iteration order. Sort to enable binary search in the
661 661 // written file.
662 662 let nodes = nodes.sorted();
663 663 let nodes_len = nodes.len();
664 664
665 665 // First accumulate serialized nodes in a `Vec`
666 666 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
667 667 for node in nodes {
668 668 let children =
669 669 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
670 670 let full_path = node.full_path(self.dirstate_map.on_disk)?;
671 671 let full_path = self.write_path(full_path.as_bytes());
672 672 let copy_source = if let Some(source) =
673 673 node.copy_source(self.dirstate_map.on_disk)?
674 674 {
675 675 self.write_path(source.as_bytes())
676 676 } else {
677 677 PathSlice {
678 678 start: 0.into(),
679 679 len: 0.into(),
680 680 }
681 681 };
682 682 on_disk_nodes.push(match node {
683 683 NodeRef::InMemory(path, node) => {
684 684 let (flags, size, mtime) = match &node.data {
685 685 dirstate_map::NodeData::Entry(entry) => {
686 686 Node::from_dirstate_entry(entry)
687 687 }
688 688 dirstate_map::NodeData::CachedDirectory { mtime } => {
689 689 // we currently never set a mtime if unknown file
690 690 // are present.
691 691 // So if we have a mtime for a directory, we know
692 692 // they are no unknown
693 693 // files and we
694 694 // blindly set ALL_UNKNOWN_RECORDED.
695 695 //
696 696 // We never set ALL_IGNORED_RECORDED since we
697 697 // don't track that case
698 698 // currently.
699 699 let mut flags = Flags::DIRECTORY
700 700 | Flags::HAS_MTIME
701 701 | Flags::ALL_UNKNOWN_RECORDED;
702 702 if mtime.second_ambiguous {
703 703 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS)
704 704 }
705 705 (flags, 0.into(), (*mtime).into())
706 706 }
707 707 dirstate_map::NodeData::None => (
708 708 Flags::DIRECTORY,
709 709 0.into(),
710 710 PackedTruncatedTimestamp::null(),
711 711 ),
712 712 };
713 713 Node {
714 714 children,
715 715 copy_source,
716 716 full_path,
717 717 base_name_start: u16::try_from(path.base_name_start())
718 718 // Could only panic for paths over 64 KiB
719 719 .expect("dirstate-v2 path length overflow")
720 720 .into(),
721 721 descendants_with_entry_count: node
722 722 .descendants_with_entry_count
723 723 .into(),
724 724 tracked_descendants_count: node
725 725 .tracked_descendants_count
726 726 .into(),
727 727 flags: flags.bits().into(),
728 728 size,
729 729 mtime,
730 730 }
731 731 }
732 732 NodeRef::OnDisk(node) => Node {
733 733 children,
734 734 copy_source,
735 735 full_path,
736 736 ..*node
737 737 },
738 738 })
739 739 }
740 740 // … so we can write them contiguously, after writing everything else
741 741 // they refer to.
742 742 let start = self.current_offset();
743 743 let len = child_nodes_len_from_usize(nodes_len);
744 744 self.out.extend(on_disk_nodes.as_bytes());
745 745 Ok(ChildNodes { start, len })
746 746 }
747 747
748 748 /// If the given slice of items is within `on_disk`, returns its offset
749 749 /// from the start of `on_disk`.
750 750 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
751 751 where
752 752 T: BytesCast,
753 753 {
754 754 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
755 755 let start = slice.as_ptr() as usize;
756 756 let end = start + slice.len();
757 757 start..=end
758 758 }
759 759 let slice_addresses = address_range(slice.as_bytes());
760 760 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
761 761 if on_disk_addresses.contains(slice_addresses.start())
762 762 && on_disk_addresses.contains(slice_addresses.end())
763 763 {
764 764 let offset = slice_addresses.start() - on_disk_addresses.start();
765 765 Some(offset_from_usize(offset))
766 766 } else {
767 767 None
768 768 }
769 769 }
770 770
771 771 fn current_offset(&mut self) -> Offset {
772 772 let mut offset = self.out.len();
773 773 if self.append {
774 774 offset += self.dirstate_map.on_disk.len()
775 775 }
776 776 offset_from_usize(offset)
777 777 }
778 778
779 779 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
780 780 let len = path_len_from_usize(slice.len());
781 781 // Reuse an already-written path if possible
782 782 if self.append {
783 783 if let Some(start) = self.on_disk_offset_of(slice) {
784 784 return PathSlice { start, len };
785 785 }
786 786 }
787 787 let start = self.current_offset();
788 788 self.out.extend(slice.as_bytes());
789 789 PathSlice { start, len }
790 790 }
791 791 }
792 792
793 793 fn offset_from_usize(x: usize) -> Offset {
794 794 u32::try_from(x)
795 795 // Could only panic for a dirstate file larger than 4 GiB
796 796 .expect("dirstate-v2 offset overflow")
797 797 .into()
798 798 }
799 799
800 800 fn child_nodes_len_from_usize(x: usize) -> Size {
801 801 u32::try_from(x)
802 802 // Could only panic with over 4 billion nodes
803 803 .expect("dirstate-v2 slice length overflow")
804 804 .into()
805 805 }
806 806
807 807 fn path_len_from_usize(x: usize) -> PathSize {
808 808 u16::try_from(x)
809 809 // Could only panic for paths over 64 KiB
810 810 .expect("dirstate-v2 path length overflow")
811 811 .into()
812 812 }
813 813
814 814 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
815 815 fn from(timestamp: TruncatedTimestamp) -> Self {
816 816 Self {
817 817 truncated_seconds: timestamp.truncated_seconds().into(),
818 818 nanoseconds: timestamp.nanoseconds().into(),
819 819 }
820 820 }
821 821 }
822 822
823 823 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
824 824 type Error = DirstateV2ParseError;
825 825
826 826 fn try_from(
827 827 timestamp: PackedTruncatedTimestamp,
828 828 ) -> Result<Self, Self::Error> {
829 829 Self::from_already_truncated(
830 830 timestamp.truncated_seconds.get(),
831 831 timestamp.nanoseconds.get(),
832 832 false,
833 833 )
834 834 }
835 835 }
836 836 impl PackedTruncatedTimestamp {
837 837 fn null() -> Self {
838 838 Self {
839 839 truncated_seconds: 0.into(),
840 840 nanoseconds: 0.into(),
841 841 }
842 842 }
843 843 }
General Comments 0
You need to be logged in to leave comments. Login now