##// END OF EJS Templates
dirstate-v2: Support appending to the same data file...
Simon Sapin -
r48478:065e6162 default
parent child Browse files
Show More
@@ -655,13 +655,41 b' if rustmod is not None:'
655 return self._rustmap
655 return self._rustmap
656
656
657 def write(self, tr, st, now):
657 def write(self, tr, st, now):
658 if self._use_dirstate_v2:
658 if not self._use_dirstate_v2:
659 packed = self._rustmap.write_v2(now)
659 p1, p2 = self.parents()
660 packed = self._rustmap.write_v1(p1, p2, now)
661 st.write(packed)
662 st.close()
663 self._dirtyparents = False
664 return
665
666 # We can only append to an existing data file if there is one
667 can_append = self.docket.uuid is not None
668 packed, append = self._rustmap.write_v2(now, can_append)
669 if append:
670 docket = self.docket
671 data_filename = docket.data_filename()
672 if tr:
673 tr.add(data_filename, docket.data_size)
674 with self._opener(data_filename, b'r+b') as fp:
675 fp.seek(docket.data_size)
676 assert fp.tell() == docket.data_size
677 written = fp.write(packed)
678 if written is not None: # py2 may return None
679 assert written == len(packed), (written, len(packed))
680 docket.data_size += len(packed)
681 docket.parents = self.parents()
682 st.write(docket.serialize())
683 st.close()
684 else:
660 old_docket = self.docket
685 old_docket = self.docket
661 new_docket = docketmod.DirstateDocket.with_new_uuid(
686 new_docket = docketmod.DirstateDocket.with_new_uuid(
662 self.parents(), len(packed)
687 self.parents(), len(packed)
663 )
688 )
664 self._opener.write(new_docket.data_filename(), packed)
689 data_filename = new_docket.data_filename()
690 if tr:
691 tr.add(data_filename, 0)
692 self._opener.write(data_filename, packed)
665 # Write the new docket after the new data file has been
693 # Write the new docket after the new data file has been
666 # written. Because `st` was opened with `atomictemp=True`,
694 # written. Because `st` was opened with `atomictemp=True`,
667 # the actual `.hg/dirstate` file is only affected on close.
695 # the actual `.hg/dirstate` file is only affected on close.
@@ -670,13 +698,16 b' if rustmod is not None:'
670 # Remove the old data file after the new docket pointing to
698 # Remove the old data file after the new docket pointing to
671 # the new data file was written.
699 # the new data file was written.
672 if old_docket.uuid:
700 if old_docket.uuid:
673 self._opener.unlink(old_docket.data_filename())
701 data_filename = old_docket.data_filename()
702 unlink = lambda _tr=None: self._opener.unlink(data_filename)
703 if tr:
704 category = b"dirstate-v2-clean-" + old_docket.uuid
705 tr.addpostclose(category, unlink)
706 else:
707 unlink()
674 self._docket = new_docket
708 self._docket = new_docket
675 else:
709 # Reload from the newly-written file
676 p1, p2 = self.parents()
710 util.clearcachedproperty(self, b"_rustmap")
677 packed = self._rustmap.write_v1(p1, p2, now)
678 st.write(packed)
679 st.close()
680 self._dirtyparents = False
711 self._dirtyparents = False
681
712
682 @propertycache
713 @propertycache
@@ -468,6 +468,24 b" impl<'on_disk> DirstateMap<'on_disk> {"
468 Ok((map, parents))
468 Ok((map, parents))
469 }
469 }
470
470
471 /// Assuming dirstate-v2 format, returns whether the next write should
472 /// append to the existing data file that contains `self.on_disk` (true),
473 /// or create a new data file from scratch (false).
474 pub(super) fn write_should_append(&self) -> bool {
475 // Soon this will be a heuristic based on the amount of unreachable
476 // data. For now it’s pseudo-random in order to make tests exercise
477 // both code paths.
478
479 fn bad_rng() -> u32 {
480 std::time::SystemTime::now()
481 .duration_since(std::time::UNIX_EPOCH)
482 .unwrap()
483 .subsec_millis()
484 }
485
486 bad_rng() % 2 == 0
487 }
488
471 fn get_node<'tree>(
489 fn get_node<'tree>(
472 &'tree self,
490 &'tree self,
473 path: &HgPath,
491 path: &HgPath,
@@ -1043,8 +1061,15 b" impl<'on_disk> super::dispatch::Dirstate"
1043 Ok(packed)
1061 Ok(packed)
1044 }
1062 }
1045
1063
1064 /// Returns new data together with whether that data should be appended to
1065 /// the existing data file whose content is at `self.on_disk` (true),
1066 /// instead of written to a new data file (false).
1046 #[timed]
1067 #[timed]
1047 fn pack_v2(&mut self, now: Timestamp) -> Result<Vec<u8>, DirstateError> {
1068 fn pack_v2(
1069 &mut self,
1070 now: Timestamp,
1071 can_append: bool,
1072 ) -> Result<(Vec<u8>, bool), DirstateError> {
1048 // TODO: how do we want to handle this in 2038?
1073 // TODO: how do we want to handle this in 2038?
1049 let now: i32 = now.0.try_into().expect("time overflow");
1074 let now: i32 = now.0.try_into().expect("time overflow");
1050 let mut paths = Vec::new();
1075 let mut paths = Vec::new();
@@ -1063,7 +1088,7 b" impl<'on_disk> super::dispatch::Dirstate"
1063
1088
1064 self.clear_known_ambiguous_mtimes(&paths)?;
1089 self.clear_known_ambiguous_mtimes(&paths)?;
1065
1090
1066 on_disk::write(self)
1091 on_disk::write(self, can_append)
1067 }
1092 }
1068
1093
1069 fn status<'a>(
1094 fn status<'a>(
@@ -179,11 +179,19 b' pub trait DirstateMapMethods {'
179
179
180 /// Clear mtimes that are ambigous with `now` (similar to
180 /// Clear mtimes that are ambigous with `now` (similar to
181 /// `clear_ambiguous_times` but for all files in the dirstate map), and
181 /// `clear_ambiguous_times` but for all files in the dirstate map), and
182 /// serialize bytes to write the `.hg/dirstate` file to disk in dirstate-v2
182 /// serialize bytes to write a dirstate data file to disk in dirstate-v2
183 /// format.
183 /// format.
184 ///
184 ///
185 /// Returns new data together with whether that data should be appended to
186 /// the existing data file whose content is at `self.on_disk` (true),
187 /// instead of written to a new data file (false).
188 ///
185 /// Note: this is only supported by the tree dirstate map.
189 /// Note: this is only supported by the tree dirstate map.
186 fn pack_v2(&mut self, now: Timestamp) -> Result<Vec<u8>, DirstateError>;
190 fn pack_v2(
191 &mut self,
192 now: Timestamp,
193 can_append: bool,
194 ) -> Result<(Vec<u8>, bool), DirstateError>;
187
195
188 /// Run the status algorithm.
196 /// Run the status algorithm.
189 ///
197 ///
@@ -383,7 +391,11 b' impl DirstateMapMethods for DirstateMap '
383 self.pack(parents, now)
391 self.pack(parents, now)
384 }
392 }
385
393
386 fn pack_v2(&mut self, _now: Timestamp) -> Result<Vec<u8>, DirstateError> {
394 fn pack_v2(
395 &mut self,
396 _now: Timestamp,
397 _can_append: bool,
398 ) -> Result<(Vec<u8>, bool), DirstateError> {
387 panic!(
399 panic!(
388 "should have used dirstate_tree::DirstateMap to use the v2 format"
400 "should have used dirstate_tree::DirstateMap to use the v2 format"
389 )
401 )
@@ -544,20 +544,28 b" pub(crate) fn for_each_tracked_path<'on_"
544 recur(on_disk, root.root_nodes, &mut f)
544 recur(on_disk, root.root_nodes, &mut f)
545 }
545 }
546
546
547 /// Returns new data together with whether that data should be appended to the
548 /// existing data file whose content is at `dirstate_map.on_disk` (true),
549 /// instead of written to a new data file (false).
547 pub(super) fn write(
550 pub(super) fn write(
548 dirstate_map: &mut DirstateMap,
551 dirstate_map: &mut DirstateMap,
549 ) -> Result<Vec<u8>, DirstateError> {
552 can_append: bool,
550 let root_len = std::mem::size_of::<Root>();
553 ) -> Result<(Vec<u8>, bool), DirstateError> {
554 let append = can_append && dirstate_map.write_should_append();
551
555
552 // This ignores the space for paths, and for nodes without an entry.
556 // This ignores the space for paths, and for nodes without an entry.
553 // TODO: better estimate? Skip the `Vec` and write to a file directly?
557 // TODO: better estimate? Skip the `Vec` and write to a file directly?
554 let size_guess = root_len
558 let size_guess = std::mem::size_of::<Root>()
555 + std::mem::size_of::<Node>()
559 + std::mem::size_of::<Node>()
556 * dirstate_map.nodes_with_entry_count as usize;
560 * dirstate_map.nodes_with_entry_count as usize;
557 let mut out = Vec::with_capacity(size_guess);
558
561
559 let root_nodes =
562 let mut writer = Writer {
560 write_nodes(dirstate_map, dirstate_map.root.as_ref(), &mut out)?;
563 dirstate_map,
564 append,
565 out: Vec::with_capacity(size_guess),
566 };
567
568 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
561
569
562 let root = Root {
570 let root = Root {
563 root_nodes,
571 root_nodes,
@@ -567,112 +575,121 b' pub(super) fn write('
567 .into(),
575 .into(),
568 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
576 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
569 };
577 };
570 out.extend(root.as_bytes());
578 writer.out.extend(root.as_bytes());
571 Ok(out)
579 Ok((writer.out, append))
580 }
581
582 struct Writer<'dmap, 'on_disk> {
583 dirstate_map: &'dmap DirstateMap<'on_disk>,
584 append: bool,
585 out: Vec<u8>,
572 }
586 }
573
587
574 fn write_nodes(
588 impl Writer<'_, '_> {
575 dirstate_map: &DirstateMap,
589 fn write_nodes(
576 nodes: dirstate_map::ChildNodesRef,
590 &mut self,
577 out: &mut Vec<u8>,
591 nodes: dirstate_map::ChildNodesRef,
578 ) -> Result<ChildNodes, DirstateError> {
592 ) -> Result<ChildNodes, DirstateError> {
579 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
593 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
580 // order. Sort to enable binary search in the written file.
594 // order. Sort to enable binary search in the written file.
581 let nodes = nodes.sorted();
595 let nodes = nodes.sorted();
582 let nodes_len = nodes.len();
596 let nodes_len = nodes.len();
583
597
584 // First accumulate serialized nodes in a `Vec`
598 // First accumulate serialized nodes in a `Vec`
585 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
599 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
586 for node in nodes {
600 for node in nodes {
587 let children = write_nodes(
601 let children =
588 dirstate_map,
602 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
589 node.children(dirstate_map.on_disk)?,
603 let full_path = node.full_path(self.dirstate_map.on_disk)?;
590 out,
604 let full_path = self.write_path(full_path.as_bytes());
591 )?;
605 let copy_source = if let Some(source) =
592 let full_path = node.full_path(dirstate_map.on_disk)?;
606 node.copy_source(self.dirstate_map.on_disk)?
593 let full_path = write_path(full_path.as_bytes(), out);
607 {
594 let copy_source =
608 self.write_path(source.as_bytes())
595 if let Some(source) = node.copy_source(dirstate_map.on_disk)? {
596 write_path(source.as_bytes(), out)
597 } else {
609 } else {
598 PathSlice {
610 PathSlice {
599 start: 0.into(),
611 start: 0.into(),
600 len: 0.into(),
612 len: 0.into(),
601 }
613 }
602 };
614 };
603 on_disk_nodes.push(match node {
615 on_disk_nodes.push(match node {
604 NodeRef::InMemory(path, node) => {
616 NodeRef::InMemory(path, node) => {
605 let (state, data) = match &node.data {
617 let (state, data) = match &node.data {
606 dirstate_map::NodeData::Entry(entry) => (
618 dirstate_map::NodeData::Entry(entry) => (
607 entry.state.into(),
619 entry.state.into(),
608 Entry {
620 Entry {
609 mode: entry.mode.into(),
621 mode: entry.mode.into(),
610 mtime: entry.mtime.into(),
622 mtime: entry.mtime.into(),
611 size: entry.size.into(),
623 size: entry.size.into(),
612 },
624 },
613 ),
625 ),
614 dirstate_map::NodeData::CachedDirectory { mtime } => {
626 dirstate_map::NodeData::CachedDirectory { mtime } => {
615 (b'd', Entry::from_timestamp(*mtime))
627 (b'd', Entry::from_timestamp(*mtime))
628 }
629 dirstate_map::NodeData::None => (
630 b'\0',
631 Entry {
632 mode: 0.into(),
633 mtime: 0.into(),
634 size: 0.into(),
635 },
636 ),
637 };
638 Node {
639 children,
640 copy_source,
641 full_path,
642 base_name_start: u16::try_from(path.base_name_start())
643 // Could only panic for paths over 64 KiB
644 .expect("dirstate-v2 path length overflow")
645 .into(),
646 descendants_with_entry_count: node
647 .descendants_with_entry_count
648 .into(),
649 tracked_descendants_count: node
650 .tracked_descendants_count
651 .into(),
652 state,
653 data,
616 }
654 }
617 dirstate_map::NodeData::None => (
655 }
618 b'\0',
656 NodeRef::OnDisk(node) => Node {
619 Entry {
620 mode: 0.into(),
621 mtime: 0.into(),
622 size: 0.into(),
623 },
624 ),
625 };
626 Node {
627 children,
657 children,
628 copy_source,
658 copy_source,
629 full_path,
659 full_path,
630 base_name_start: u16::try_from(path.base_name_start())
660 ..*node
631 // Could only panic for paths over 64 KiB
661 },
632 .expect("dirstate-v2 path length overflow")
662 })
633 .into(),
663 }
634 descendants_with_entry_count: node
664 // … so we can write them contiguously, after writing everything else
635 .descendants_with_entry_count
665 // they refer to.
636 .into(),
666 let start = self.current_offset();
637 tracked_descendants_count: node
667 let len = u32::try_from(nodes_len)
638 .tracked_descendants_count
668 // Could only panic with over 4 billion nodes
639 .into(),
669 .expect("dirstate-v2 path length overflow")
640 state,
670 .into();
641 data,
671 self.out.extend(on_disk_nodes.as_bytes());
642 }
672 Ok(ChildNodes { start, len })
643 }
644 NodeRef::OnDisk(node) => Node {
645 children,
646 copy_source,
647 full_path,
648 ..*node
649 },
650 })
651 }
673 }
652 // … so we can write them contiguously, after writing everything else they
653 // refer to.
654 let start = current_offset(out);
655 let len = u32::try_from(nodes_len)
656 // Could only panic with over 4 billion nodes
657 .expect("dirstate-v2 path length overflow")
658 .into();
659 out.extend(on_disk_nodes.as_bytes());
660 Ok(ChildNodes { start, len })
661 }
662
674
663 fn current_offset(out: &Vec<u8>) -> Offset {
675 fn current_offset(&mut self) -> Offset {
664 u32::try_from(out.len())
676 let mut offset = self.out.len();
665 // Could only panic for a dirstate file larger than 4 GiB
677 if self.append {
666 .expect("dirstate-v2 offset overflow")
678 offset += self.dirstate_map.on_disk.len()
667 .into()
679 }
668 }
680 u32::try_from(offset)
681 // Could only panic for a dirstate file larger than 4 GiB
682 .expect("dirstate-v2 offset overflow")
683 .into()
684 }
669
685
670 fn write_path(slice: &[u8], out: &mut Vec<u8>) -> PathSlice {
686 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
671 let start = current_offset(out);
687 let start = self.current_offset();
672 let len = u16::try_from(slice.len())
688 let len = u16::try_from(slice.len())
673 // Could only panic for paths over 64 KiB
689 // Could only panic for paths over 64 KiB
674 .expect("dirstate-v2 path length overflow")
690 .expect("dirstate-v2 path length overflow")
675 .into();
691 .into();
676 out.extend(slice.as_bytes());
692 self.out.extend(slice.as_bytes());
677 PathSlice { start, len }
693 PathSlice { start, len }
694 }
678 }
695 }
@@ -340,16 +340,23 b' py_class!(pub class DirstateMap |py| {'
340 }
340 }
341 }
341 }
342
342
343 /// Returns new data together with whether that data should be appended to
344 /// the existing data file whose content is at `self.on_disk` (True),
345 /// instead of written to a new data file (False).
343 def write_v2(
346 def write_v2(
344 &self,
347 &self,
345 now: PyObject
348 now: PyObject,
346 ) -> PyResult<PyBytes> {
349 can_append: bool,
350 ) -> PyResult<PyObject> {
347 let now = Timestamp(now.extract(py)?);
351 let now = Timestamp(now.extract(py)?);
348
352
349 let mut inner = self.inner(py).borrow_mut();
353 let mut inner = self.inner(py).borrow_mut();
350 let result = inner.pack_v2(now);
354 let result = inner.pack_v2(now, can_append);
351 match result {
355 match result {
352 Ok(packed) => Ok(PyBytes::new(py, &packed)),
356 Ok((packed, append)) => {
357 let packed = PyBytes::new(py, &packed);
358 Ok((packed, append).to_py_object(py).into_object())
359 },
353 Err(_) => Err(PyErr::new::<exc::OSError, _>(
360 Err(_) => Err(PyErr::new::<exc::OSError, _>(
354 py,
361 py,
355 "Dirstate error".to_string(),
362 "Dirstate error".to_string(),
@@ -124,8 +124,12 b' impl DirstateMapMethods for OwningDirsta'
124 self.get_mut().pack_v1(parents, now)
124 self.get_mut().pack_v1(parents, now)
125 }
125 }
126
126
127 fn pack_v2(&mut self, now: Timestamp) -> Result<Vec<u8>, DirstateError> {
127 fn pack_v2(
128 self.get_mut().pack_v2(now)
128 &mut self,
129 now: Timestamp,
130 can_append: bool,
131 ) -> Result<(Vec<u8>, bool), DirstateError> {
132 self.get_mut().pack_v2(now, can_append)
129 }
133 }
130
134
131 fn status<'a>(
135 fn status<'a>(
General Comments 0
You need to be logged in to leave comments. Login now