##// END OF EJS Templates
rust-dirstate: trace append/no append to help debugging
Raphaël Gomès -
r51074:f2e13d8d stable
parent child Browse files
Show More
@@ -1,878 +1,883
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
4 4
5 5 use crate::dirstate::{DirstateV2Data, TruncatedTimestamp};
6 6 use crate::dirstate_tree::dirstate_map::DirstateVersion;
7 7 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
8 8 use crate::dirstate_tree::path_with_basename::WithBasename;
9 9 use crate::errors::HgError;
10 10 use crate::utils::hg_path::HgPath;
11 11 use crate::DirstateEntry;
12 12 use crate::DirstateError;
13 13 use crate::DirstateParents;
14 14 use bitflags::bitflags;
15 15 use bytes_cast::unaligned::{U16Be, U32Be};
16 16 use bytes_cast::BytesCast;
17 17 use format_bytes::format_bytes;
18 18 use rand::Rng;
19 19 use std::borrow::Cow;
20 20 use std::convert::{TryFrom, TryInto};
21 21 use std::fmt::Write;
22 22
23 23 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
24 24 /// This a redundant sanity check more than an actual "magic number" since
25 25 /// `.hg/requires` already governs which format should be used.
26 26 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
27 27
28 28 /// Keep space for 256-bit hashes
29 29 const STORED_NODE_ID_BYTES: usize = 32;
30 30
31 31 /// … even though only 160 bits are used for now, with SHA-1
32 32 const USED_NODE_ID_BYTES: usize = 20;
33 33
34 34 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
35 35 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
36 36
37 37 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
38 38 const TREE_METADATA_SIZE: usize = 44;
39 39 const NODE_SIZE: usize = 44;
40 40
41 41 /// Make sure that size-affecting changes are made knowingly
42 42 #[allow(unused)]
43 43 fn static_assert_size_of() {
44 44 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
45 45 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
46 46 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
47 47 }
48 48
49 49 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
50 50 #[derive(BytesCast)]
51 51 #[repr(C)]
52 52 struct DocketHeader {
53 53 marker: [u8; V2_FORMAT_MARKER.len()],
54 54 parent_1: [u8; STORED_NODE_ID_BYTES],
55 55 parent_2: [u8; STORED_NODE_ID_BYTES],
56 56
57 57 metadata: TreeMetadata,
58 58
59 59 /// Counted in bytes
60 60 data_size: Size,
61 61
62 62 uuid_size: u8,
63 63 }
64 64
65 65 pub struct Docket<'on_disk> {
66 66 header: &'on_disk DocketHeader,
67 67 pub uuid: &'on_disk [u8],
68 68 }
69 69
70 70 /// Fields are documented in the *Tree metadata in the docket file*
71 71 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
72 72 #[derive(BytesCast)]
73 73 #[repr(C)]
74 74 pub struct TreeMetadata {
75 75 root_nodes: ChildNodes,
76 76 nodes_with_entry_count: Size,
77 77 nodes_with_copy_source_count: Size,
78 78 unreachable_bytes: Size,
79 79 unused: [u8; 4],
80 80
81 81 /// See *Optional hash of ignore patterns* section of
82 82 /// `mercurial/helptext/internals/dirstate-v2.txt`
83 83 ignore_patterns_hash: IgnorePatternsHash,
84 84 }
85 85
86 86 /// Fields are documented in the *The data file format*
87 87 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
88 88 #[derive(BytesCast, Debug)]
89 89 #[repr(C)]
90 90 pub(super) struct Node {
91 91 full_path: PathSlice,
92 92
93 93 /// In bytes from `self.full_path.start`
94 94 base_name_start: PathSize,
95 95
96 96 copy_source: OptPathSlice,
97 97 children: ChildNodes,
98 98 pub(super) descendants_with_entry_count: Size,
99 99 pub(super) tracked_descendants_count: Size,
100 100 flags: U16Be,
101 101 size: U32Be,
102 102 mtime: PackedTruncatedTimestamp,
103 103 }
104 104
105 105 bitflags! {
106 106 #[repr(C)]
107 107 struct Flags: u16 {
108 108 const WDIR_TRACKED = 1 << 0;
109 109 const P1_TRACKED = 1 << 1;
110 110 const P2_INFO = 1 << 2;
111 111 const MODE_EXEC_PERM = 1 << 3;
112 112 const MODE_IS_SYMLINK = 1 << 4;
113 113 const HAS_FALLBACK_EXEC = 1 << 5;
114 114 const FALLBACK_EXEC = 1 << 6;
115 115 const HAS_FALLBACK_SYMLINK = 1 << 7;
116 116 const FALLBACK_SYMLINK = 1 << 8;
117 117 const EXPECTED_STATE_IS_MODIFIED = 1 << 9;
118 118 const HAS_MODE_AND_SIZE = 1 <<10;
119 119 const HAS_MTIME = 1 <<11;
120 120 const MTIME_SECOND_AMBIGUOUS = 1 << 12;
121 121 const DIRECTORY = 1 <<13;
122 122 const ALL_UNKNOWN_RECORDED = 1 <<14;
123 123 const ALL_IGNORED_RECORDED = 1 <<15;
124 124 }
125 125 }
126 126
127 127 /// Duration since the Unix epoch
128 128 #[derive(BytesCast, Copy, Clone, Debug)]
129 129 #[repr(C)]
130 130 struct PackedTruncatedTimestamp {
131 131 truncated_seconds: U32Be,
132 132 nanoseconds: U32Be,
133 133 }
134 134
135 135 /// Counted in bytes from the start of the file
136 136 ///
137 137 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
138 138 type Offset = U32Be;
139 139
140 140 /// Counted in number of items
141 141 ///
142 142 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
143 143 type Size = U32Be;
144 144
145 145 /// Counted in bytes
146 146 ///
147 147 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
148 148 type PathSize = U16Be;
149 149
150 150 /// A contiguous sequence of `len` times `Node`, representing the child nodes
151 151 /// of either some other node or of the repository root.
152 152 ///
153 153 /// Always sorted by ascending `full_path`, to allow binary search.
154 154 /// Since nodes with the same parent nodes also have the same parent path,
155 155 /// only the `base_name`s need to be compared during binary search.
156 156 #[derive(BytesCast, Copy, Clone, Debug)]
157 157 #[repr(C)]
158 158 struct ChildNodes {
159 159 start: Offset,
160 160 len: Size,
161 161 }
162 162
163 163 /// A `HgPath` of `len` bytes
164 164 #[derive(BytesCast, Copy, Clone, Debug)]
165 165 #[repr(C)]
166 166 struct PathSlice {
167 167 start: Offset,
168 168 len: PathSize,
169 169 }
170 170
171 171 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
172 172 type OptPathSlice = PathSlice;
173 173
174 174 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
175 175 ///
176 176 /// This should only happen if Mercurial is buggy or a repository is corrupted.
177 177 #[derive(Debug)]
178 178 pub struct DirstateV2ParseError {
179 179 message: String,
180 180 }
181 181
182 182 impl DirstateV2ParseError {
183 183 pub fn new<S: Into<String>>(message: S) -> Self {
184 184 Self {
185 185 message: message.into(),
186 186 }
187 187 }
188 188 }
189 189
190 190 impl From<DirstateV2ParseError> for HgError {
191 191 fn from(e: DirstateV2ParseError) -> Self {
192 192 HgError::corrupted(format!("dirstate-v2 parse error: {}", e.message))
193 193 }
194 194 }
195 195
196 196 impl From<DirstateV2ParseError> for crate::DirstateError {
197 197 fn from(error: DirstateV2ParseError) -> Self {
198 198 HgError::from(error).into()
199 199 }
200 200 }
201 201
202 202 impl TreeMetadata {
203 203 pub fn as_bytes(&self) -> &[u8] {
204 204 BytesCast::as_bytes(self)
205 205 }
206 206 }
207 207
208 208 impl<'on_disk> Docket<'on_disk> {
209 209 /// Generate the identifier for a new data file
210 210 ///
211 211 /// TODO: support the `HGTEST_UUIDFILE` environment variable.
212 212 /// See `mercurial/revlogutils/docket.py`
213 213 pub fn new_uid() -> String {
214 214 const ID_LENGTH: usize = 8;
215 215 let mut id = String::with_capacity(ID_LENGTH);
216 216 let mut rng = rand::thread_rng();
217 217 for _ in 0..ID_LENGTH {
218 218 // One random hexadecimal digit.
219 219 // `unwrap` never panics because `impl Write for String`
220 220 // never returns an error.
221 221 write!(&mut id, "{:x}", rng.gen_range(0..16)).unwrap();
222 222 }
223 223 id
224 224 }
225 225
226 226 pub fn serialize(
227 227 parents: DirstateParents,
228 228 tree_metadata: TreeMetadata,
229 229 data_size: u64,
230 230 uuid: &[u8],
231 231 ) -> Result<Vec<u8>, std::num::TryFromIntError> {
232 232 let header = DocketHeader {
233 233 marker: *V2_FORMAT_MARKER,
234 234 parent_1: parents.p1.pad_to_256_bits(),
235 235 parent_2: parents.p2.pad_to_256_bits(),
236 236 metadata: tree_metadata,
237 237 data_size: u32::try_from(data_size)?.into(),
238 238 uuid_size: uuid.len().try_into()?,
239 239 };
240 240 let header = header.as_bytes();
241 241 let mut docket = Vec::with_capacity(header.len() + uuid.len());
242 242 docket.extend_from_slice(header);
243 243 docket.extend_from_slice(uuid);
244 244 Ok(docket)
245 245 }
246 246
247 247 pub fn parents(&self) -> DirstateParents {
248 248 use crate::Node;
249 249 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
250 250 .unwrap()
251 251 .clone();
252 252 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
253 253 .unwrap()
254 254 .clone();
255 255 DirstateParents { p1, p2 }
256 256 }
257 257
258 258 pub fn tree_metadata(&self) -> &[u8] {
259 259 self.header.metadata.as_bytes()
260 260 }
261 261
262 262 pub fn data_size(&self) -> usize {
263 263 // This `unwrap` could only panic on a 16-bit CPU
264 264 self.header.data_size.get().try_into().unwrap()
265 265 }
266 266
267 267 pub fn data_filename(&self) -> String {
268 268 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
269 269 }
270 270 }
271 271
272 272 pub fn read_docket(
273 273 on_disk: &[u8],
274 274 ) -> Result<Docket<'_>, DirstateV2ParseError> {
275 275 let (header, uuid) = DocketHeader::from_bytes(on_disk).map_err(|e| {
276 276 DirstateV2ParseError::new(format!("when reading docket, {}", e))
277 277 })?;
278 278 let uuid_size = header.uuid_size as usize;
279 279 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
280 280 Ok(Docket { header, uuid })
281 281 } else {
282 282 Err(DirstateV2ParseError::new(
283 283 "invalid format marker or uuid size",
284 284 ))
285 285 }
286 286 }
287 287
288 288 pub(super) fn read<'on_disk>(
289 289 on_disk: &'on_disk [u8],
290 290 metadata: &[u8],
291 291 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
292 292 if on_disk.is_empty() {
293 293 let mut map = DirstateMap::empty(on_disk);
294 294 map.dirstate_version = DirstateVersion::V2;
295 295 return Ok(map);
296 296 }
297 297 let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| {
298 298 DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e))
299 299 })?;
300 300 let dirstate_map = DirstateMap {
301 301 on_disk,
302 302 root: dirstate_map::ChildNodes::OnDisk(
303 303 read_nodes(on_disk, meta.root_nodes).map_err(|mut e| {
304 304 e.message = format!("{}, when reading root notes", e.message);
305 305 e
306 306 })?,
307 307 ),
308 308 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
309 309 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
310 310 ignore_patterns_hash: meta.ignore_patterns_hash,
311 311 unreachable_bytes: meta.unreachable_bytes.get(),
312 312 old_data_size: on_disk.len(),
313 313 dirstate_version: DirstateVersion::V2,
314 314 };
315 315 Ok(dirstate_map)
316 316 }
317 317
318 318 impl Node {
319 319 pub(super) fn full_path<'on_disk>(
320 320 &self,
321 321 on_disk: &'on_disk [u8],
322 322 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
323 323 read_hg_path(on_disk, self.full_path)
324 324 }
325 325
326 326 pub(super) fn base_name_start<'on_disk>(
327 327 &self,
328 328 ) -> Result<usize, DirstateV2ParseError> {
329 329 let start = self.base_name_start.get();
330 330 if start < self.full_path.len.get() {
331 331 let start = usize::try_from(start)
332 332 // u32 -> usize, could only panic on a 16-bit CPU
333 333 .expect("dirstate-v2 base_name_start out of bounds");
334 334 Ok(start)
335 335 } else {
336 336 Err(DirstateV2ParseError::new("not enough bytes for base name"))
337 337 }
338 338 }
339 339
340 340 pub(super) fn base_name<'on_disk>(
341 341 &self,
342 342 on_disk: &'on_disk [u8],
343 343 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
344 344 let full_path = self.full_path(on_disk)?;
345 345 let base_name_start = self.base_name_start()?;
346 346 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
347 347 }
348 348
349 349 pub(super) fn path<'on_disk>(
350 350 &self,
351 351 on_disk: &'on_disk [u8],
352 352 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
353 353 Ok(WithBasename::from_raw_parts(
354 354 Cow::Borrowed(self.full_path(on_disk)?),
355 355 self.base_name_start()?,
356 356 ))
357 357 }
358 358
359 359 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
360 360 self.copy_source.start.get() != 0
361 361 }
362 362
363 363 pub(super) fn copy_source<'on_disk>(
364 364 &self,
365 365 on_disk: &'on_disk [u8],
366 366 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
367 367 Ok(if self.has_copy_source() {
368 368 Some(read_hg_path(on_disk, self.copy_source)?)
369 369 } else {
370 370 None
371 371 })
372 372 }
373 373
374 374 fn flags(&self) -> Flags {
375 375 Flags::from_bits_truncate(self.flags.get())
376 376 }
377 377
378 378 fn has_entry(&self) -> bool {
379 379 self.flags().intersects(
380 380 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
381 381 )
382 382 }
383 383
384 384 pub(super) fn node_data(
385 385 &self,
386 386 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
387 387 if self.has_entry() {
388 388 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
389 389 } else if let Some(mtime) = self.cached_directory_mtime()? {
390 390 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
391 391 } else {
392 392 Ok(dirstate_map::NodeData::None)
393 393 }
394 394 }
395 395
396 396 pub(super) fn cached_directory_mtime(
397 397 &self,
398 398 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
399 399 // For now we do not have code to handle the absence of
400 400 // ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is
401 401 // unset.
402 402 if self.flags().contains(Flags::DIRECTORY)
403 403 && self.flags().contains(Flags::HAS_MTIME)
404 404 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
405 405 {
406 406 Ok(Some(self.mtime()?))
407 407 } else {
408 408 Ok(None)
409 409 }
410 410 }
411 411
412 412 fn synthesize_unix_mode(&self) -> u32 {
413 413 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
414 414 libc::S_IFLNK
415 415 } else {
416 416 libc::S_IFREG
417 417 };
418 418 let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
419 419 0o755
420 420 } else {
421 421 0o644
422 422 };
423 423 (file_type | permisions).into()
424 424 }
425 425
426 426 fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> {
427 427 let mut m: TruncatedTimestamp = self.mtime.try_into()?;
428 428 if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) {
429 429 m.second_ambiguous = true;
430 430 }
431 431 Ok(m)
432 432 }
433 433
434 434 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
435 435 // TODO: convert through raw bits instead?
436 436 let wc_tracked = self.flags().contains(Flags::WDIR_TRACKED);
437 437 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
438 438 let p2_info = self.flags().contains(Flags::P2_INFO);
439 439 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
440 440 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
441 441 {
442 442 Some((self.synthesize_unix_mode(), self.size.into()))
443 443 } else {
444 444 None
445 445 };
446 446 let mtime = if self.flags().contains(Flags::HAS_MTIME)
447 447 && !self.flags().contains(Flags::DIRECTORY)
448 448 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
449 449 {
450 450 Some(self.mtime()?)
451 451 } else {
452 452 None
453 453 };
454 454 let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC)
455 455 {
456 456 Some(self.flags().contains(Flags::FALLBACK_EXEC))
457 457 } else {
458 458 None
459 459 };
460 460 let fallback_symlink =
461 461 if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) {
462 462 Some(self.flags().contains(Flags::FALLBACK_SYMLINK))
463 463 } else {
464 464 None
465 465 };
466 466 Ok(DirstateEntry::from_v2_data(DirstateV2Data {
467 467 wc_tracked,
468 468 p1_tracked,
469 469 p2_info,
470 470 mode_size,
471 471 mtime,
472 472 fallback_exec,
473 473 fallback_symlink,
474 474 }))
475 475 }
476 476
477 477 pub(super) fn entry(
478 478 &self,
479 479 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
480 480 if self.has_entry() {
481 481 Ok(Some(self.assume_entry()?))
482 482 } else {
483 483 Ok(None)
484 484 }
485 485 }
486 486
487 487 pub(super) fn children<'on_disk>(
488 488 &self,
489 489 on_disk: &'on_disk [u8],
490 490 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
491 491 read_nodes(on_disk, self.children)
492 492 }
493 493
494 494 pub(super) fn to_in_memory_node<'on_disk>(
495 495 &self,
496 496 on_disk: &'on_disk [u8],
497 497 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
498 498 Ok(dirstate_map::Node {
499 499 children: dirstate_map::ChildNodes::OnDisk(
500 500 self.children(on_disk)?,
501 501 ),
502 502 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
503 503 data: self.node_data()?,
504 504 descendants_with_entry_count: self
505 505 .descendants_with_entry_count
506 506 .get(),
507 507 tracked_descendants_count: self.tracked_descendants_count.get(),
508 508 })
509 509 }
510 510
511 511 fn from_dirstate_entry(
512 512 entry: &DirstateEntry,
513 513 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
514 514 let DirstateV2Data {
515 515 wc_tracked,
516 516 p1_tracked,
517 517 p2_info,
518 518 mode_size: mode_size_opt,
519 519 mtime: mtime_opt,
520 520 fallback_exec,
521 521 fallback_symlink,
522 522 } = entry.v2_data();
523 523 // TODO: convert through raw flag bits instead?
524 524 let mut flags = Flags::empty();
525 525 flags.set(Flags::WDIR_TRACKED, wc_tracked);
526 526 flags.set(Flags::P1_TRACKED, p1_tracked);
527 527 flags.set(Flags::P2_INFO, p2_info);
528 528 let size = if let Some((m, s)) = mode_size_opt {
529 529 let exec_perm = m & (libc::S_IXUSR as u32) != 0;
530 530 let is_symlink = m & (libc::S_IFMT as u32) == libc::S_IFLNK as u32;
531 531 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
532 532 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
533 533 flags.insert(Flags::HAS_MODE_AND_SIZE);
534 534 s.into()
535 535 } else {
536 536 0.into()
537 537 };
538 538 let mtime = if let Some(m) = mtime_opt {
539 539 flags.insert(Flags::HAS_MTIME);
540 540 if m.second_ambiguous {
541 541 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS);
542 542 };
543 543 m.into()
544 544 } else {
545 545 PackedTruncatedTimestamp::null()
546 546 };
547 547 if let Some(f_exec) = fallback_exec {
548 548 flags.insert(Flags::HAS_FALLBACK_EXEC);
549 549 if f_exec {
550 550 flags.insert(Flags::FALLBACK_EXEC);
551 551 }
552 552 }
553 553 if let Some(f_symlink) = fallback_symlink {
554 554 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
555 555 if f_symlink {
556 556 flags.insert(Flags::FALLBACK_SYMLINK);
557 557 }
558 558 }
559 559 (flags, size, mtime)
560 560 }
561 561 }
562 562
563 563 fn read_hg_path(
564 564 on_disk: &[u8],
565 565 slice: PathSlice,
566 566 ) -> Result<&HgPath, DirstateV2ParseError> {
567 567 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
568 568 }
569 569
570 570 fn read_nodes(
571 571 on_disk: &[u8],
572 572 slice: ChildNodes,
573 573 ) -> Result<&[Node], DirstateV2ParseError> {
574 574 read_slice(on_disk, slice.start, slice.len.get())
575 575 }
576 576
577 577 fn read_slice<T, Len>(
578 578 on_disk: &[u8],
579 579 start: Offset,
580 580 len: Len,
581 581 ) -> Result<&[T], DirstateV2ParseError>
582 582 where
583 583 T: BytesCast,
584 584 Len: TryInto<usize>,
585 585 {
586 586 // Either `usize::MAX` would result in "out of bounds" error since a single
587 587 // `&[u8]` cannot occupy the entire addess space.
588 588 let start = start.get().try_into().unwrap_or(std::usize::MAX);
589 589 let len = len.try_into().unwrap_or(std::usize::MAX);
590 590 let bytes = match on_disk.get(start..) {
591 591 Some(bytes) => bytes,
592 592 None => {
593 593 return Err(DirstateV2ParseError::new(
594 594 "not enough bytes from disk",
595 595 ))
596 596 }
597 597 };
598 598 T::slice_from_bytes(bytes, len)
599 599 .map_err(|e| {
600 600 DirstateV2ParseError::new(format!("when reading a slice, {}", e))
601 601 })
602 602 .map(|(slice, _rest)| slice)
603 603 }
604 604
605 605 pub(crate) fn for_each_tracked_path<'on_disk>(
606 606 on_disk: &'on_disk [u8],
607 607 metadata: &[u8],
608 608 mut f: impl FnMut(&'on_disk HgPath),
609 609 ) -> Result<(), DirstateV2ParseError> {
610 610 let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| {
611 611 DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e))
612 612 })?;
613 613 fn recur<'on_disk>(
614 614 on_disk: &'on_disk [u8],
615 615 nodes: ChildNodes,
616 616 f: &mut impl FnMut(&'on_disk HgPath),
617 617 ) -> Result<(), DirstateV2ParseError> {
618 618 for node in read_nodes(on_disk, nodes)? {
619 619 if let Some(entry) = node.entry()? {
620 620 if entry.tracked() {
621 621 f(node.full_path(on_disk)?)
622 622 }
623 623 }
624 624 recur(on_disk, node.children, f)?
625 625 }
626 626 Ok(())
627 627 }
628 628 recur(on_disk, meta.root_nodes, &mut f)
629 629 }
630 630
631 631 /// Returns new data and metadata, together with whether that data should be
632 632 /// appended to the existing data file whose content is at
633 633 /// `dirstate_map.on_disk` (true), instead of written to a new data file
634 634 /// (false), and the previous size of data on disk.
635 635 pub(super) fn write(
636 636 dirstate_map: &DirstateMap,
637 637 can_append: bool,
638 638 ) -> Result<(Vec<u8>, TreeMetadata, bool, usize), DirstateError> {
639 639 let append = can_append && dirstate_map.write_should_append();
640 if append {
641 log::trace!("appending to the dirstate data file");
642 } else {
643 log::trace!("creating new dirstate data file");
644 }
640 645
641 646 // This ignores the space for paths, and for nodes without an entry.
642 647 // TODO: better estimate? Skip the `Vec` and write to a file directly?
643 648 let size_guess = std::mem::size_of::<Node>()
644 649 * dirstate_map.nodes_with_entry_count as usize;
645 650
646 651 let mut writer = Writer {
647 652 dirstate_map,
648 653 append,
649 654 out: Vec::with_capacity(size_guess),
650 655 };
651 656
652 657 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
653 658
654 659 let unreachable_bytes = if append {
655 660 dirstate_map.unreachable_bytes
656 661 } else {
657 662 0
658 663 };
659 664 let meta = TreeMetadata {
660 665 root_nodes,
661 666 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
662 667 nodes_with_copy_source_count: dirstate_map
663 668 .nodes_with_copy_source_count
664 669 .into(),
665 670 unreachable_bytes: unreachable_bytes.into(),
666 671 unused: [0; 4],
667 672 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
668 673 };
669 674 Ok((writer.out, meta, append, dirstate_map.old_data_size))
670 675 }
671 676
672 677 struct Writer<'dmap, 'on_disk> {
673 678 dirstate_map: &'dmap DirstateMap<'on_disk>,
674 679 append: bool,
675 680 out: Vec<u8>,
676 681 }
677 682
678 683 impl Writer<'_, '_> {
679 684 fn write_nodes(
680 685 &mut self,
681 686 nodes: dirstate_map::ChildNodesRef,
682 687 ) -> Result<ChildNodes, DirstateError> {
683 688 // Reuse already-written nodes if possible
684 689 if self.append {
685 690 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
686 691 let start = self.on_disk_offset_of(nodes_slice).expect(
687 692 "dirstate-v2 OnDisk nodes not found within on_disk",
688 693 );
689 694 let len = child_nodes_len_from_usize(nodes_slice.len());
690 695 return Ok(ChildNodes { start, len });
691 696 }
692 697 }
693 698
694 699 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
695 700 // undefined iteration order. Sort to enable binary search in the
696 701 // written file.
697 702 let nodes = nodes.sorted();
698 703 let nodes_len = nodes.len();
699 704
700 705 // First accumulate serialized nodes in a `Vec`
701 706 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
702 707 for node in nodes {
703 708 let children =
704 709 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
705 710 let full_path = node.full_path(self.dirstate_map.on_disk)?;
706 711 let full_path = self.write_path(full_path.as_bytes());
707 712 let copy_source = if let Some(source) =
708 713 node.copy_source(self.dirstate_map.on_disk)?
709 714 {
710 715 self.write_path(source.as_bytes())
711 716 } else {
712 717 PathSlice {
713 718 start: 0.into(),
714 719 len: 0.into(),
715 720 }
716 721 };
717 722 on_disk_nodes.push(match node {
718 723 NodeRef::InMemory(path, node) => {
719 724 let (flags, size, mtime) = match &node.data {
720 725 dirstate_map::NodeData::Entry(entry) => {
721 726 Node::from_dirstate_entry(entry)
722 727 }
723 728 dirstate_map::NodeData::CachedDirectory { mtime } => {
724 729 // we currently never set a mtime if unknown file
725 730 // are present.
726 731 // So if we have a mtime for a directory, we know
727 732 // they are no unknown
728 733 // files and we
729 734 // blindly set ALL_UNKNOWN_RECORDED.
730 735 //
731 736 // We never set ALL_IGNORED_RECORDED since we
732 737 // don't track that case
733 738 // currently.
734 739 let mut flags = Flags::DIRECTORY
735 740 | Flags::HAS_MTIME
736 741 | Flags::ALL_UNKNOWN_RECORDED;
737 742 if mtime.second_ambiguous {
738 743 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS)
739 744 }
740 745 (flags, 0.into(), (*mtime).into())
741 746 }
742 747 dirstate_map::NodeData::None => (
743 748 Flags::DIRECTORY,
744 749 0.into(),
745 750 PackedTruncatedTimestamp::null(),
746 751 ),
747 752 };
748 753 Node {
749 754 children,
750 755 copy_source,
751 756 full_path,
752 757 base_name_start: u16::try_from(path.base_name_start())
753 758 // Could only panic for paths over 64 KiB
754 759 .expect("dirstate-v2 path length overflow")
755 760 .into(),
756 761 descendants_with_entry_count: node
757 762 .descendants_with_entry_count
758 763 .into(),
759 764 tracked_descendants_count: node
760 765 .tracked_descendants_count
761 766 .into(),
762 767 flags: flags.bits().into(),
763 768 size,
764 769 mtime,
765 770 }
766 771 }
767 772 NodeRef::OnDisk(node) => Node {
768 773 children,
769 774 copy_source,
770 775 full_path,
771 776 ..*node
772 777 },
773 778 })
774 779 }
775 780 // … so we can write them contiguously, after writing everything else
776 781 // they refer to.
777 782 let start = self.current_offset();
778 783 let len = child_nodes_len_from_usize(nodes_len);
779 784 self.out.extend(on_disk_nodes.as_bytes());
780 785 Ok(ChildNodes { start, len })
781 786 }
782 787
783 788 /// If the given slice of items is within `on_disk`, returns its offset
784 789 /// from the start of `on_disk`.
785 790 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
786 791 where
787 792 T: BytesCast,
788 793 {
789 794 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
790 795 let start = slice.as_ptr() as usize;
791 796 let end = start + slice.len();
792 797 start..=end
793 798 }
794 799 let slice_addresses = address_range(slice.as_bytes());
795 800 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
796 801 if on_disk_addresses.contains(slice_addresses.start())
797 802 && on_disk_addresses.contains(slice_addresses.end())
798 803 {
799 804 let offset = slice_addresses.start() - on_disk_addresses.start();
800 805 Some(offset_from_usize(offset))
801 806 } else {
802 807 None
803 808 }
804 809 }
805 810
806 811 fn current_offset(&mut self) -> Offset {
807 812 let mut offset = self.out.len();
808 813 if self.append {
809 814 offset += self.dirstate_map.on_disk.len()
810 815 }
811 816 offset_from_usize(offset)
812 817 }
813 818
814 819 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
815 820 let len = path_len_from_usize(slice.len());
816 821 // Reuse an already-written path if possible
817 822 if self.append {
818 823 if let Some(start) = self.on_disk_offset_of(slice) {
819 824 return PathSlice { start, len };
820 825 }
821 826 }
822 827 let start = self.current_offset();
823 828 self.out.extend(slice.as_bytes());
824 829 PathSlice { start, len }
825 830 }
826 831 }
827 832
828 833 fn offset_from_usize(x: usize) -> Offset {
829 834 u32::try_from(x)
830 835 // Could only panic for a dirstate file larger than 4 GiB
831 836 .expect("dirstate-v2 offset overflow")
832 837 .into()
833 838 }
834 839
835 840 fn child_nodes_len_from_usize(x: usize) -> Size {
836 841 u32::try_from(x)
837 842 // Could only panic with over 4 billion nodes
838 843 .expect("dirstate-v2 slice length overflow")
839 844 .into()
840 845 }
841 846
842 847 fn path_len_from_usize(x: usize) -> PathSize {
843 848 u16::try_from(x)
844 849 // Could only panic for paths over 64 KiB
845 850 .expect("dirstate-v2 path length overflow")
846 851 .into()
847 852 }
848 853
849 854 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
850 855 fn from(timestamp: TruncatedTimestamp) -> Self {
851 856 Self {
852 857 truncated_seconds: timestamp.truncated_seconds().into(),
853 858 nanoseconds: timestamp.nanoseconds().into(),
854 859 }
855 860 }
856 861 }
857 862
858 863 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
859 864 type Error = DirstateV2ParseError;
860 865
861 866 fn try_from(
862 867 timestamp: PackedTruncatedTimestamp,
863 868 ) -> Result<Self, Self::Error> {
864 869 Self::from_already_truncated(
865 870 timestamp.truncated_seconds.get(),
866 871 timestamp.nanoseconds.get(),
867 872 false,
868 873 )
869 874 }
870 875 }
871 876 impl PackedTruncatedTimestamp {
872 877 fn null() -> Self {
873 878 Self {
874 879 truncated_seconds: 0.into(),
875 880 nanoseconds: 0.into(),
876 881 }
877 882 }
878 883 }
General Comments 0
You need to be logged in to leave comments. Login now