##// END OF EJS Templates
dirstate-v2: Support appending to the same data file...
Simon Sapin -
r48478:065e6162 default
parent child Browse files
Show More
@@ -655,13 +655,41 b' if rustmod is not None:'
655 655 return self._rustmap
656 656
657 657 def write(self, tr, st, now):
658 if self._use_dirstate_v2:
659 packed = self._rustmap.write_v2(now)
658 if not self._use_dirstate_v2:
659 p1, p2 = self.parents()
660 packed = self._rustmap.write_v1(p1, p2, now)
661 st.write(packed)
662 st.close()
663 self._dirtyparents = False
664 return
665
666 # We can only append to an existing data file if there is one
667 can_append = self.docket.uuid is not None
668 packed, append = self._rustmap.write_v2(now, can_append)
669 if append:
670 docket = self.docket
671 data_filename = docket.data_filename()
672 if tr:
673 tr.add(data_filename, docket.data_size)
674 with self._opener(data_filename, b'r+b') as fp:
675 fp.seek(docket.data_size)
676 assert fp.tell() == docket.data_size
677 written = fp.write(packed)
678 if written is not None: # py2 may return None
679 assert written == len(packed), (written, len(packed))
680 docket.data_size += len(packed)
681 docket.parents = self.parents()
682 st.write(docket.serialize())
683 st.close()
684 else:
660 685 old_docket = self.docket
661 686 new_docket = docketmod.DirstateDocket.with_new_uuid(
662 687 self.parents(), len(packed)
663 688 )
664 self._opener.write(new_docket.data_filename(), packed)
689 data_filename = new_docket.data_filename()
690 if tr:
691 tr.add(data_filename, 0)
692 self._opener.write(data_filename, packed)
665 693 # Write the new docket after the new data file has been
666 694 # written. Because `st` was opened with `atomictemp=True`,
667 695 # the actual `.hg/dirstate` file is only affected on close.
@@ -670,13 +698,16 b' if rustmod is not None:'
670 698 # Remove the old data file after the new docket pointing to
671 699 # the new data file was written.
672 700 if old_docket.uuid:
673 self._opener.unlink(old_docket.data_filename())
701 data_filename = old_docket.data_filename()
702 unlink = lambda _tr=None: self._opener.unlink(data_filename)
703 if tr:
704 category = b"dirstate-v2-clean-" + old_docket.uuid
705 tr.addpostclose(category, unlink)
706 else:
707 unlink()
674 708 self._docket = new_docket
675 else:
676 p1, p2 = self.parents()
677 packed = self._rustmap.write_v1(p1, p2, now)
678 st.write(packed)
679 st.close()
709 # Reload from the newly-written file
710 util.clearcachedproperty(self, b"_rustmap")
680 711 self._dirtyparents = False
681 712
682 713 @propertycache
@@ -468,6 +468,24 b" impl<'on_disk> DirstateMap<'on_disk> {"
468 468 Ok((map, parents))
469 469 }
470 470
471 /// Assuming dirstate-v2 format, returns whether the next write should
472 /// append to the existing data file that contains `self.on_disk` (true),
473 /// or create a new data file from scratch (false).
474 pub(super) fn write_should_append(&self) -> bool {
475 // Soon this will be a heuristic based on the amount of unreachable
476 // data. For now it’s pseudo-random in order to make tests exercise
477 // both code paths.
478
479 fn bad_rng() -> u32 {
480 std::time::SystemTime::now()
481 .duration_since(std::time::UNIX_EPOCH)
482 .unwrap()
483 .subsec_millis()
484 }
485
486 bad_rng() % 2 == 0
487 }
488
471 489 fn get_node<'tree>(
472 490 &'tree self,
473 491 path: &HgPath,
@@ -1043,8 +1061,15 b" impl<'on_disk> super::dispatch::Dirstate"
1043 1061 Ok(packed)
1044 1062 }
1045 1063
1064 /// Returns new data together with whether that data should be appended to
1065 /// the existing data file whose content is at `self.on_disk` (true),
1066 /// instead of written to a new data file (false).
1046 1067 #[timed]
1047 fn pack_v2(&mut self, now: Timestamp) -> Result<Vec<u8>, DirstateError> {
1068 fn pack_v2(
1069 &mut self,
1070 now: Timestamp,
1071 can_append: bool,
1072 ) -> Result<(Vec<u8>, bool), DirstateError> {
1048 1073 // TODO: how do we want to handle this in 2038?
1049 1074 let now: i32 = now.0.try_into().expect("time overflow");
1050 1075 let mut paths = Vec::new();
@@ -1063,7 +1088,7 b" impl<'on_disk> super::dispatch::Dirstate"
1063 1088
1064 1089 self.clear_known_ambiguous_mtimes(&paths)?;
1065 1090
1066 on_disk::write(self)
1091 on_disk::write(self, can_append)
1067 1092 }
1068 1093
1069 1094 fn status<'a>(
@@ -179,11 +179,19 b' pub trait DirstateMapMethods {'
179 179
180 180 /// Clear mtimes that are ambigous with `now` (similar to
181 181 /// `clear_ambiguous_times` but for all files in the dirstate map), and
182 /// serialize bytes to write the `.hg/dirstate` file to disk in dirstate-v2
182 /// serialize bytes to write a dirstate data file to disk in dirstate-v2
183 183 /// format.
184 184 ///
185 /// Returns new data together with whether that data should be appended to
186 /// the existing data file whose content is at `self.on_disk` (true),
187 /// instead of written to a new data file (false).
188 ///
185 189 /// Note: this is only supported by the tree dirstate map.
186 fn pack_v2(&mut self, now: Timestamp) -> Result<Vec<u8>, DirstateError>;
190 fn pack_v2(
191 &mut self,
192 now: Timestamp,
193 can_append: bool,
194 ) -> Result<(Vec<u8>, bool), DirstateError>;
187 195
188 196 /// Run the status algorithm.
189 197 ///
@@ -383,7 +391,11 b' impl DirstateMapMethods for DirstateMap '
383 391 self.pack(parents, now)
384 392 }
385 393
386 fn pack_v2(&mut self, _now: Timestamp) -> Result<Vec<u8>, DirstateError> {
394 fn pack_v2(
395 &mut self,
396 _now: Timestamp,
397 _can_append: bool,
398 ) -> Result<(Vec<u8>, bool), DirstateError> {
387 399 panic!(
388 400 "should have used dirstate_tree::DirstateMap to use the v2 format"
389 401 )
@@ -544,20 +544,28 b" pub(crate) fn for_each_tracked_path<'on_"
544 544 recur(on_disk, root.root_nodes, &mut f)
545 545 }
546 546
547 /// Returns new data together with whether that data should be appended to the
548 /// existing data file whose content is at `dirstate_map.on_disk` (true),
549 /// instead of written to a new data file (false).
547 550 pub(super) fn write(
548 551 dirstate_map: &mut DirstateMap,
549 ) -> Result<Vec<u8>, DirstateError> {
550 let root_len = std::mem::size_of::<Root>();
552 can_append: bool,
553 ) -> Result<(Vec<u8>, bool), DirstateError> {
554 let append = can_append && dirstate_map.write_should_append();
551 555
552 556 // This ignores the space for paths, and for nodes without an entry.
553 557 // TODO: better estimate? Skip the `Vec` and write to a file directly?
554 let size_guess = root_len
558 let size_guess = std::mem::size_of::<Root>()
555 559 + std::mem::size_of::<Node>()
556 560 * dirstate_map.nodes_with_entry_count as usize;
557 let mut out = Vec::with_capacity(size_guess);
558 561
559 let root_nodes =
560 write_nodes(dirstate_map, dirstate_map.root.as_ref(), &mut out)?;
562 let mut writer = Writer {
563 dirstate_map,
564 append,
565 out: Vec::with_capacity(size_guess),
566 };
567
568 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
561 569
562 570 let root = Root {
563 571 root_nodes,
@@ -567,112 +575,121 b' pub(super) fn write('
567 575 .into(),
568 576 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
569 577 };
570 out.extend(root.as_bytes());
571 Ok(out)
578 writer.out.extend(root.as_bytes());
579 Ok((writer.out, append))
580 }
581
582 struct Writer<'dmap, 'on_disk> {
583 dirstate_map: &'dmap DirstateMap<'on_disk>,
584 append: bool,
585 out: Vec<u8>,
572 586 }
573 587
574 fn write_nodes(
575 dirstate_map: &DirstateMap,
576 nodes: dirstate_map::ChildNodesRef,
577 out: &mut Vec<u8>,
578 ) -> Result<ChildNodes, DirstateError> {
579 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
580 // order. Sort to enable binary search in the written file.
581 let nodes = nodes.sorted();
582 let nodes_len = nodes.len();
588 impl Writer<'_, '_> {
589 fn write_nodes(
590 &mut self,
591 nodes: dirstate_map::ChildNodesRef,
592 ) -> Result<ChildNodes, DirstateError> {
593 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
594 // order. Sort to enable binary search in the written file.
595 let nodes = nodes.sorted();
596 let nodes_len = nodes.len();
583 597
584 // First accumulate serialized nodes in a `Vec`
585 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
586 for node in nodes {
587 let children = write_nodes(
588 dirstate_map,
589 node.children(dirstate_map.on_disk)?,
590 out,
591 )?;
592 let full_path = node.full_path(dirstate_map.on_disk)?;
593 let full_path = write_path(full_path.as_bytes(), out);
594 let copy_source =
595 if let Some(source) = node.copy_source(dirstate_map.on_disk)? {
596 write_path(source.as_bytes(), out)
598 // First accumulate serialized nodes in a `Vec`
599 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
600 for node in nodes {
601 let children =
602 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
603 let full_path = node.full_path(self.dirstate_map.on_disk)?;
604 let full_path = self.write_path(full_path.as_bytes());
605 let copy_source = if let Some(source) =
606 node.copy_source(self.dirstate_map.on_disk)?
607 {
608 self.write_path(source.as_bytes())
597 609 } else {
598 610 PathSlice {
599 611 start: 0.into(),
600 612 len: 0.into(),
601 613 }
602 614 };
603 on_disk_nodes.push(match node {
604 NodeRef::InMemory(path, node) => {
605 let (state, data) = match &node.data {
606 dirstate_map::NodeData::Entry(entry) => (
607 entry.state.into(),
608 Entry {
609 mode: entry.mode.into(),
610 mtime: entry.mtime.into(),
611 size: entry.size.into(),
612 },
613 ),
614 dirstate_map::NodeData::CachedDirectory { mtime } => {
615 (b'd', Entry::from_timestamp(*mtime))
615 on_disk_nodes.push(match node {
616 NodeRef::InMemory(path, node) => {
617 let (state, data) = match &node.data {
618 dirstate_map::NodeData::Entry(entry) => (
619 entry.state.into(),
620 Entry {
621 mode: entry.mode.into(),
622 mtime: entry.mtime.into(),
623 size: entry.size.into(),
624 },
625 ),
626 dirstate_map::NodeData::CachedDirectory { mtime } => {
627 (b'd', Entry::from_timestamp(*mtime))
628 }
629 dirstate_map::NodeData::None => (
630 b'\0',
631 Entry {
632 mode: 0.into(),
633 mtime: 0.into(),
634 size: 0.into(),
635 },
636 ),
637 };
638 Node {
639 children,
640 copy_source,
641 full_path,
642 base_name_start: u16::try_from(path.base_name_start())
643 // Could only panic for paths over 64 KiB
644 .expect("dirstate-v2 path length overflow")
645 .into(),
646 descendants_with_entry_count: node
647 .descendants_with_entry_count
648 .into(),
649 tracked_descendants_count: node
650 .tracked_descendants_count
651 .into(),
652 state,
653 data,
616 654 }
617 dirstate_map::NodeData::None => (
618 b'\0',
619 Entry {
620 mode: 0.into(),
621 mtime: 0.into(),
622 size: 0.into(),
623 },
624 ),
625 };
626 Node {
655 }
656 NodeRef::OnDisk(node) => Node {
627 657 children,
628 658 copy_source,
629 659 full_path,
630 base_name_start: u16::try_from(path.base_name_start())
631 // Could only panic for paths over 64 KiB
632 .expect("dirstate-v2 path length overflow")
633 .into(),
634 descendants_with_entry_count: node
635 .descendants_with_entry_count
636 .into(),
637 tracked_descendants_count: node
638 .tracked_descendants_count
639 .into(),
640 state,
641 data,
642 }
643 }
644 NodeRef::OnDisk(node) => Node {
645 children,
646 copy_source,
647 full_path,
648 ..*node
649 },
650 })
660 ..*node
661 },
662 })
663 }
664 // … so we can write them contiguously, after writing everything else
665 // they refer to.
666 let start = self.current_offset();
667 let len = u32::try_from(nodes_len)
668 // Could only panic with over 4 billion nodes
669 .expect("dirstate-v2 path length overflow")
670 .into();
671 self.out.extend(on_disk_nodes.as_bytes());
672 Ok(ChildNodes { start, len })
651 673 }
652 // … so we can write them contiguously, after writing everything else they
653 // refer to.
654 let start = current_offset(out);
655 let len = u32::try_from(nodes_len)
656 // Could only panic with over 4 billion nodes
657 .expect("dirstate-v2 path length overflow")
658 .into();
659 out.extend(on_disk_nodes.as_bytes());
660 Ok(ChildNodes { start, len })
661 }
662 674
663 fn current_offset(out: &Vec<u8>) -> Offset {
664 u32::try_from(out.len())
665 // Could only panic for a dirstate file larger than 4 GiB
666 .expect("dirstate-v2 offset overflow")
667 .into()
668 }
675 fn current_offset(&mut self) -> Offset {
676 let mut offset = self.out.len();
677 if self.append {
678 offset += self.dirstate_map.on_disk.len()
679 }
680 u32::try_from(offset)
681 // Could only panic for a dirstate file larger than 4 GiB
682 .expect("dirstate-v2 offset overflow")
683 .into()
684 }
669 685
670 fn write_path(slice: &[u8], out: &mut Vec<u8>) -> PathSlice {
671 let start = current_offset(out);
672 let len = u16::try_from(slice.len())
673 // Could only panic for paths over 64 KiB
674 .expect("dirstate-v2 path length overflow")
675 .into();
676 out.extend(slice.as_bytes());
677 PathSlice { start, len }
686 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
687 let start = self.current_offset();
688 let len = u16::try_from(slice.len())
689 // Could only panic for paths over 64 KiB
690 .expect("dirstate-v2 path length overflow")
691 .into();
692 self.out.extend(slice.as_bytes());
693 PathSlice { start, len }
694 }
678 695 }
@@ -340,16 +340,23 b' py_class!(pub class DirstateMap |py| {'
340 340 }
341 341 }
342 342
343 /// Returns new data together with whether that data should be appended to
344 /// the existing data file whose content is at `self.on_disk` (True),
345 /// instead of written to a new data file (False).
343 346 def write_v2(
344 347 &self,
345 now: PyObject
346 ) -> PyResult<PyBytes> {
348 now: PyObject,
349 can_append: bool,
350 ) -> PyResult<PyObject> {
347 351 let now = Timestamp(now.extract(py)?);
348 352
349 353 let mut inner = self.inner(py).borrow_mut();
350 let result = inner.pack_v2(now);
354 let result = inner.pack_v2(now, can_append);
351 355 match result {
352 Ok(packed) => Ok(PyBytes::new(py, &packed)),
356 Ok((packed, append)) => {
357 let packed = PyBytes::new(py, &packed);
358 Ok((packed, append).to_py_object(py).into_object())
359 },
353 360 Err(_) => Err(PyErr::new::<exc::OSError, _>(
354 361 py,
355 362 "Dirstate error".to_string(),
@@ -124,8 +124,12 b' impl DirstateMapMethods for OwningDirsta'
124 124 self.get_mut().pack_v1(parents, now)
125 125 }
126 126
127 fn pack_v2(&mut self, now: Timestamp) -> Result<Vec<u8>, DirstateError> {
128 self.get_mut().pack_v2(now)
127 fn pack_v2(
128 &mut self,
129 now: Timestamp,
130 can_append: bool,
131 ) -> Result<(Vec<u8>, bool), DirstateError> {
132 self.get_mut().pack_v2(now, can_append)
129 133 }
130 134
131 135 fn status<'a>(
General Comments 0
You need to be logged in to leave comments. Login now