Show More
@@ -655,13 +655,41 b' if rustmod is not None:' | |||
|
655 | 655 | return self._rustmap |
|
656 | 656 | |
|
657 | 657 | def write(self, tr, st, now): |
|
658 | if self._use_dirstate_v2: | |
|
659 |
p |
|
|
658 | if not self._use_dirstate_v2: | |
|
659 | p1, p2 = self.parents() | |
|
660 | packed = self._rustmap.write_v1(p1, p2, now) | |
|
661 | st.write(packed) | |
|
662 | st.close() | |
|
663 | self._dirtyparents = False | |
|
664 | return | |
|
665 | ||
|
666 | # We can only append to an existing data file if there is one | |
|
667 | can_append = self.docket.uuid is not None | |
|
668 | packed, append = self._rustmap.write_v2(now, can_append) | |
|
669 | if append: | |
|
670 | docket = self.docket | |
|
671 | data_filename = docket.data_filename() | |
|
672 | if tr: | |
|
673 | tr.add(data_filename, docket.data_size) | |
|
674 | with self._opener(data_filename, b'r+b') as fp: | |
|
675 | fp.seek(docket.data_size) | |
|
676 | assert fp.tell() == docket.data_size | |
|
677 | written = fp.write(packed) | |
|
678 | if written is not None: # py2 may return None | |
|
679 | assert written == len(packed), (written, len(packed)) | |
|
680 | docket.data_size += len(packed) | |
|
681 | docket.parents = self.parents() | |
|
682 | st.write(docket.serialize()) | |
|
683 | st.close() | |
|
684 | else: | |
|
660 | 685 | old_docket = self.docket |
|
661 | 686 | new_docket = docketmod.DirstateDocket.with_new_uuid( |
|
662 | 687 | self.parents(), len(packed) |
|
663 | 688 | ) |
|
664 |
|
|
|
689 | data_filename = new_docket.data_filename() | |
|
690 | if tr: | |
|
691 | tr.add(data_filename, 0) | |
|
692 | self._opener.write(data_filename, packed) | |
|
665 | 693 | # Write the new docket after the new data file has been |
|
666 | 694 | # written. Because `st` was opened with `atomictemp=True`, |
|
667 | 695 | # the actual `.hg/dirstate` file is only affected on close. |
@@ -670,13 +698,16 b' if rustmod is not None:' | |||
|
670 | 698 | # Remove the old data file after the new docket pointing to |
|
671 | 699 | # the new data file was written. |
|
672 | 700 | if old_docket.uuid: |
|
673 |
|
|
|
701 | data_filename = old_docket.data_filename() | |
|
702 | unlink = lambda _tr=None: self._opener.unlink(data_filename) | |
|
703 | if tr: | |
|
704 | category = b"dirstate-v2-clean-" + old_docket.uuid | |
|
705 | tr.addpostclose(category, unlink) | |
|
706 | else: | |
|
707 | unlink() | |
|
674 | 708 | self._docket = new_docket |
|
675 | else: | |
|
676 | p1, p2 = self.parents() | |
|
677 | packed = self._rustmap.write_v1(p1, p2, now) | |
|
678 | st.write(packed) | |
|
679 | st.close() | |
|
709 | # Reload from the newly-written file | |
|
710 | util.clearcachedproperty(self, b"_rustmap") | |
|
680 | 711 | self._dirtyparents = False |
|
681 | 712 | |
|
682 | 713 | @propertycache |
@@ -468,6 +468,24 b" impl<'on_disk> DirstateMap<'on_disk> {" | |||
|
468 | 468 | Ok((map, parents)) |
|
469 | 469 | } |
|
470 | 470 | |
|
471 | /// Assuming dirstate-v2 format, returns whether the next write should | |
|
472 | /// append to the existing data file that contains `self.on_disk` (true), | |
|
473 | /// or create a new data file from scratch (false). | |
|
474 | pub(super) fn write_should_append(&self) -> bool { | |
|
475 | // Soon this will be a heuristic based on the amount of unreachable | |
|
476 | // data. For now it’s pseudo-random in order to make tests exercise | |
|
477 | // both code paths. | |
|
478 | ||
|
479 | fn bad_rng() -> u32 { | |
|
480 | std::time::SystemTime::now() | |
|
481 | .duration_since(std::time::UNIX_EPOCH) | |
|
482 | .unwrap() | |
|
483 | .subsec_millis() | |
|
484 | } | |
|
485 | ||
|
486 | bad_rng() % 2 == 0 | |
|
487 | } | |
|
488 | ||
|
471 | 489 | fn get_node<'tree>( |
|
472 | 490 | &'tree self, |
|
473 | 491 | path: &HgPath, |
@@ -1043,8 +1061,15 b" impl<'on_disk> super::dispatch::Dirstate" | |||
|
1043 | 1061 | Ok(packed) |
|
1044 | 1062 | } |
|
1045 | 1063 | |
|
1064 | /// Returns new data together with whether that data should be appended to | |
|
1065 | /// the existing data file whose content is at `self.on_disk` (true), | |
|
1066 | /// instead of written to a new data file (false). | |
|
1046 | 1067 | #[timed] |
|
1047 | fn pack_v2(&mut self, now: Timestamp) -> Result<Vec<u8>, DirstateError> { | |
|
1068 | fn pack_v2( | |
|
1069 | &mut self, | |
|
1070 | now: Timestamp, | |
|
1071 | can_append: bool, | |
|
1072 | ) -> Result<(Vec<u8>, bool), DirstateError> { | |
|
1048 | 1073 | // TODO: how do we want to handle this in 2038? |
|
1049 | 1074 | let now: i32 = now.0.try_into().expect("time overflow"); |
|
1050 | 1075 | let mut paths = Vec::new(); |
@@ -1063,7 +1088,7 b" impl<'on_disk> super::dispatch::Dirstate" | |||
|
1063 | 1088 | |
|
1064 | 1089 | self.clear_known_ambiguous_mtimes(&paths)?; |
|
1065 | 1090 | |
|
1066 | on_disk::write(self) | |
|
1091 | on_disk::write(self, can_append) | |
|
1067 | 1092 | } |
|
1068 | 1093 | |
|
1069 | 1094 | fn status<'a>( |
@@ -179,11 +179,19 b' pub trait DirstateMapMethods {' | |||
|
179 | 179 | |
|
180 | 180 | /// Clear mtimes that are ambigous with `now` (similar to |
|
181 | 181 | /// `clear_ambiguous_times` but for all files in the dirstate map), and |
|
182 |
/// serialize bytes to write |
|
|
182 | /// serialize bytes to write a dirstate data file to disk in dirstate-v2 | |
|
183 | 183 | /// format. |
|
184 | 184 | /// |
|
185 | /// Returns new data together with whether that data should be appended to | |
|
186 | /// the existing data file whose content is at `self.on_disk` (true), | |
|
187 | /// instead of written to a new data file (false). | |
|
188 | /// | |
|
185 | 189 | /// Note: this is only supported by the tree dirstate map. |
|
186 | fn pack_v2(&mut self, now: Timestamp) -> Result<Vec<u8>, DirstateError>; | |
|
190 | fn pack_v2( | |
|
191 | &mut self, | |
|
192 | now: Timestamp, | |
|
193 | can_append: bool, | |
|
194 | ) -> Result<(Vec<u8>, bool), DirstateError>; | |
|
187 | 195 | |
|
188 | 196 | /// Run the status algorithm. |
|
189 | 197 | /// |
@@ -383,7 +391,11 b' impl DirstateMapMethods for DirstateMap ' | |||
|
383 | 391 | self.pack(parents, now) |
|
384 | 392 | } |
|
385 | 393 | |
|
386 | fn pack_v2(&mut self, _now: Timestamp) -> Result<Vec<u8>, DirstateError> { | |
|
394 | fn pack_v2( | |
|
395 | &mut self, | |
|
396 | _now: Timestamp, | |
|
397 | _can_append: bool, | |
|
398 | ) -> Result<(Vec<u8>, bool), DirstateError> { | |
|
387 | 399 | panic!( |
|
388 | 400 | "should have used dirstate_tree::DirstateMap to use the v2 format" |
|
389 | 401 | ) |
@@ -544,20 +544,28 b" pub(crate) fn for_each_tracked_path<'on_" | |||
|
544 | 544 | recur(on_disk, root.root_nodes, &mut f) |
|
545 | 545 | } |
|
546 | 546 | |
|
547 | /// Returns new data together with whether that data should be appended to the | |
|
548 | /// existing data file whose content is at `dirstate_map.on_disk` (true), | |
|
549 | /// instead of written to a new data file (false). | |
|
547 | 550 | pub(super) fn write( |
|
548 | 551 | dirstate_map: &mut DirstateMap, |
|
549 | ) -> Result<Vec<u8>, DirstateError> { | |
|
550 | let root_len = std::mem::size_of::<Root>(); | |
|
552 | can_append: bool, | |
|
553 | ) -> Result<(Vec<u8>, bool), DirstateError> { | |
|
554 | let append = can_append && dirstate_map.write_should_append(); | |
|
551 | 555 | |
|
552 | 556 | // This ignores the space for paths, and for nodes without an entry. |
|
553 | 557 | // TODO: better estimate? Skip the `Vec` and write to a file directly? |
|
554 |
let size_guess = |
|
|
558 | let size_guess = std::mem::size_of::<Root>() | |
|
555 | 559 | + std::mem::size_of::<Node>() |
|
556 | 560 | * dirstate_map.nodes_with_entry_count as usize; |
|
557 | let mut out = Vec::with_capacity(size_guess); | |
|
558 | 561 | |
|
559 | let root_nodes = | |
|
560 | write_nodes(dirstate_map, dirstate_map.root.as_ref(), &mut out)?; | |
|
562 | let mut writer = Writer { | |
|
563 | dirstate_map, | |
|
564 | append, | |
|
565 | out: Vec::with_capacity(size_guess), | |
|
566 | }; | |
|
567 | ||
|
568 | let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?; | |
|
561 | 569 | |
|
562 | 570 | let root = Root { |
|
563 | 571 | root_nodes, |
@@ -567,112 +575,121 b' pub(super) fn write(' | |||
|
567 | 575 | .into(), |
|
568 | 576 | ignore_patterns_hash: dirstate_map.ignore_patterns_hash, |
|
569 | 577 | }; |
|
570 | out.extend(root.as_bytes()); | |
|
571 | Ok(out) | |
|
578 | writer.out.extend(root.as_bytes()); | |
|
579 | Ok((writer.out, append)) | |
|
580 | } | |
|
581 | ||
|
582 | struct Writer<'dmap, 'on_disk> { | |
|
583 | dirstate_map: &'dmap DirstateMap<'on_disk>, | |
|
584 | append: bool, | |
|
585 | out: Vec<u8>, | |
|
572 | 586 | } |
|
573 | 587 | |
|
574 | fn write_nodes( | |
|
575 | dirstate_map: &DirstateMap, | |
|
576 | nodes: dirstate_map::ChildNodesRef, | |
|
577 | out: &mut Vec<u8>, | |
|
578 | ) -> Result<ChildNodes, DirstateError> { | |
|
579 | // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration | |
|
580 | // order. Sort to enable binary search in the written file. | |
|
581 | let nodes = nodes.sorted(); | |
|
582 | let nodes_len = nodes.len(); | |
|
588 | impl Writer<'_, '_> { | |
|
589 | fn write_nodes( | |
|
590 | &mut self, | |
|
591 | nodes: dirstate_map::ChildNodesRef, | |
|
592 | ) -> Result<ChildNodes, DirstateError> { | |
|
593 | // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration | |
|
594 | // order. Sort to enable binary search in the written file. | |
|
595 | let nodes = nodes.sorted(); | |
|
596 | let nodes_len = nodes.len(); | |
|
583 | 597 | |
|
584 | // First accumulate serialized nodes in a `Vec` | |
|
585 | let mut on_disk_nodes = Vec::with_capacity(nodes_len); | |
|
586 | for node in nodes { | |
|
587 |
let children = |
|
|
588 | dirstate_map, | |
|
589 |
node. |
|
|
590 | out, | |
|
591 | )?; | |
|
592 |
|
|
|
593 | let full_path = write_path(full_path.as_bytes(), out); | |
|
594 | let copy_source = | |
|
595 | if let Some(source) = node.copy_source(dirstate_map.on_disk)? { | |
|
596 | write_path(source.as_bytes(), out) | |
|
598 | // First accumulate serialized nodes in a `Vec` | |
|
599 | let mut on_disk_nodes = Vec::with_capacity(nodes_len); | |
|
600 | for node in nodes { | |
|
601 | let children = | |
|
602 | self.write_nodes(node.children(self.dirstate_map.on_disk)?)?; | |
|
603 | let full_path = node.full_path(self.dirstate_map.on_disk)?; | |
|
604 | let full_path = self.write_path(full_path.as_bytes()); | |
|
605 | let copy_source = if let Some(source) = | |
|
606 | node.copy_source(self.dirstate_map.on_disk)? | |
|
607 | { | |
|
608 | self.write_path(source.as_bytes()) | |
|
597 | 609 | } else { |
|
598 | 610 | PathSlice { |
|
599 | 611 | start: 0.into(), |
|
600 | 612 | len: 0.into(), |
|
601 | 613 | } |
|
602 | 614 | }; |
|
603 | on_disk_nodes.push(match node { | |
|
604 | NodeRef::InMemory(path, node) => { | |
|
605 | let (state, data) = match &node.data { | |
|
606 | dirstate_map::NodeData::Entry(entry) => ( | |
|
607 | entry.state.into(), | |
|
608 | Entry { | |
|
609 | mode: entry.mode.into(), | |
|
610 | mtime: entry.mtime.into(), | |
|
611 | size: entry.size.into(), | |
|
612 | }, | |
|
613 | ), | |
|
614 | dirstate_map::NodeData::CachedDirectory { mtime } => { | |
|
615 | (b'd', Entry::from_timestamp(*mtime)) | |
|
615 | on_disk_nodes.push(match node { | |
|
616 | NodeRef::InMemory(path, node) => { | |
|
617 | let (state, data) = match &node.data { | |
|
618 | dirstate_map::NodeData::Entry(entry) => ( | |
|
619 | entry.state.into(), | |
|
620 | Entry { | |
|
621 | mode: entry.mode.into(), | |
|
622 | mtime: entry.mtime.into(), | |
|
623 | size: entry.size.into(), | |
|
624 | }, | |
|
625 | ), | |
|
626 | dirstate_map::NodeData::CachedDirectory { mtime } => { | |
|
627 | (b'd', Entry::from_timestamp(*mtime)) | |
|
628 | } | |
|
629 | dirstate_map::NodeData::None => ( | |
|
630 | b'\0', | |
|
631 | Entry { | |
|
632 | mode: 0.into(), | |
|
633 | mtime: 0.into(), | |
|
634 | size: 0.into(), | |
|
635 | }, | |
|
636 | ), | |
|
637 | }; | |
|
638 | Node { | |
|
639 | children, | |
|
640 | copy_source, | |
|
641 | full_path, | |
|
642 | base_name_start: u16::try_from(path.base_name_start()) | |
|
643 | // Could only panic for paths over 64 KiB | |
|
644 | .expect("dirstate-v2 path length overflow") | |
|
645 | .into(), | |
|
646 | descendants_with_entry_count: node | |
|
647 | .descendants_with_entry_count | |
|
648 | .into(), | |
|
649 | tracked_descendants_count: node | |
|
650 | .tracked_descendants_count | |
|
651 | .into(), | |
|
652 | state, | |
|
653 | data, | |
|
616 | 654 | } |
|
617 | dirstate_map::NodeData::None => ( | |
|
618 | b'\0', | |
|
619 | Entry { | |
|
620 | mode: 0.into(), | |
|
621 | mtime: 0.into(), | |
|
622 | size: 0.into(), | |
|
623 | }, | |
|
624 | ), | |
|
625 | }; | |
|
626 | Node { | |
|
655 | } | |
|
656 | NodeRef::OnDisk(node) => Node { | |
|
627 | 657 | children, |
|
628 | 658 | copy_source, |
|
629 | 659 | full_path, |
|
630 | base_name_start: u16::try_from(path.base_name_start()) | |
|
631 | // Could only panic for paths over 64 KiB | |
|
632 | .expect("dirstate-v2 path length overflow") | |
|
633 | .into(), | |
|
634 | descendants_with_entry_count: node | |
|
635 | .descendants_with_entry_count | |
|
636 | .into(), | |
|
637 | tracked_descendants_count: node | |
|
638 | .tracked_descendants_count | |
|
639 | .into(), | |
|
640 | state, | |
|
641 | data, | |
|
642 | } | |
|
643 | } | |
|
644 | NodeRef::OnDisk(node) => Node { | |
|
645 | children, | |
|
646 | copy_source, | |
|
647 | full_path, | |
|
648 | ..*node | |
|
649 | }, | |
|
650 | }) | |
|
660 | ..*node | |
|
661 | }, | |
|
662 | }) | |
|
663 | } | |
|
664 | // … so we can write them contiguously, after writing everything else | |
|
665 | // they refer to. | |
|
666 | let start = self.current_offset(); | |
|
667 | let len = u32::try_from(nodes_len) | |
|
668 | // Could only panic with over 4 billion nodes | |
|
669 | .expect("dirstate-v2 path length overflow") | |
|
670 | .into(); | |
|
671 | self.out.extend(on_disk_nodes.as_bytes()); | |
|
672 | Ok(ChildNodes { start, len }) | |
|
651 | 673 | } |
|
652 | // … so we can write them contiguously, after writing everything else they | |
|
653 | // refer to. | |
|
654 | let start = current_offset(out); | |
|
655 | let len = u32::try_from(nodes_len) | |
|
656 | // Could only panic with over 4 billion nodes | |
|
657 | .expect("dirstate-v2 path length overflow") | |
|
658 | .into(); | |
|
659 | out.extend(on_disk_nodes.as_bytes()); | |
|
660 | Ok(ChildNodes { start, len }) | |
|
661 | } | |
|
662 | 674 | |
|
663 |
fn current_offset( |
|
|
664 | u32::try_from(out.len()) | |
|
665 | // Could only panic for a dirstate file larger than 4 GiB | |
|
666 | .expect("dirstate-v2 offset overflow") | |
|
667 |
|
|
|
668 | } | |
|
675 | fn current_offset(&mut self) -> Offset { | |
|
676 | let mut offset = self.out.len(); | |
|
677 | if self.append { | |
|
678 | offset += self.dirstate_map.on_disk.len() | |
|
679 | } | |
|
680 | u32::try_from(offset) | |
|
681 | // Could only panic for a dirstate file larger than 4 GiB | |
|
682 | .expect("dirstate-v2 offset overflow") | |
|
683 | .into() | |
|
684 | } | |
|
669 | 685 | |
|
670 |
fn write_path(slice: &[u8] |
|
|
671 |
let start = current_offset( |
|
|
672 | let len = u16::try_from(slice.len()) | |
|
673 | // Could only panic for paths over 64 KiB | |
|
674 | .expect("dirstate-v2 path length overflow") | |
|
675 | .into(); | |
|
676 | out.extend(slice.as_bytes()); | |
|
677 | PathSlice { start, len } | |
|
686 | fn write_path(&mut self, slice: &[u8]) -> PathSlice { | |
|
687 | let start = self.current_offset(); | |
|
688 | let len = u16::try_from(slice.len()) | |
|
689 | // Could only panic for paths over 64 KiB | |
|
690 | .expect("dirstate-v2 path length overflow") | |
|
691 | .into(); | |
|
692 | self.out.extend(slice.as_bytes()); | |
|
693 | PathSlice { start, len } | |
|
694 | } | |
|
678 | 695 | } |
@@ -340,16 +340,23 b' py_class!(pub class DirstateMap |py| {' | |||
|
340 | 340 | } |
|
341 | 341 | } |
|
342 | 342 | |
|
343 | /// Returns new data together with whether that data should be appended to | |
|
344 | /// the existing data file whose content is at `self.on_disk` (True), | |
|
345 | /// instead of written to a new data file (False). | |
|
343 | 346 | def write_v2( |
|
344 | 347 | &self, |
|
345 | now: PyObject | |
|
346 | ) -> PyResult<PyBytes> { | |
|
348 | now: PyObject, | |
|
349 | can_append: bool, | |
|
350 | ) -> PyResult<PyObject> { | |
|
347 | 351 | let now = Timestamp(now.extract(py)?); |
|
348 | 352 | |
|
349 | 353 | let mut inner = self.inner(py).borrow_mut(); |
|
350 | let result = inner.pack_v2(now); | |
|
354 | let result = inner.pack_v2(now, can_append); | |
|
351 | 355 | match result { |
|
352 | Ok(packed) => Ok(PyBytes::new(py, &packed)), | |
|
356 | Ok((packed, append)) => { | |
|
357 | let packed = PyBytes::new(py, &packed); | |
|
358 | Ok((packed, append).to_py_object(py).into_object()) | |
|
359 | }, | |
|
353 | 360 | Err(_) => Err(PyErr::new::<exc::OSError, _>( |
|
354 | 361 | py, |
|
355 | 362 | "Dirstate error".to_string(), |
@@ -124,8 +124,12 b' impl DirstateMapMethods for OwningDirsta' | |||
|
124 | 124 | self.get_mut().pack_v1(parents, now) |
|
125 | 125 | } |
|
126 | 126 | |
|
127 | fn pack_v2(&mut self, now: Timestamp) -> Result<Vec<u8>, DirstateError> { | |
|
128 | self.get_mut().pack_v2(now) | |
|
127 | fn pack_v2( | |
|
128 | &mut self, | |
|
129 | now: Timestamp, | |
|
130 | can_append: bool, | |
|
131 | ) -> Result<(Vec<u8>, bool), DirstateError> { | |
|
132 | self.get_mut().pack_v2(now, can_append) | |
|
129 | 133 | } |
|
130 | 134 | |
|
131 | 135 | fn status<'a>( |
General Comments 0
You need to be logged in to leave comments.
Login now