Show More
@@ -655,13 +655,41 b' if rustmod is not None:' | |||||
655 | return self._rustmap |
|
655 | return self._rustmap | |
656 |
|
656 | |||
657 | def write(self, tr, st, now): |
|
657 | def write(self, tr, st, now): | |
658 | if self._use_dirstate_v2: |
|
658 | if not self._use_dirstate_v2: | |
659 |
p |
|
659 | p1, p2 = self.parents() | |
|
660 | packed = self._rustmap.write_v1(p1, p2, now) | |||
|
661 | st.write(packed) | |||
|
662 | st.close() | |||
|
663 | self._dirtyparents = False | |||
|
664 | return | |||
|
665 | ||||
|
666 | # We can only append to an existing data file if there is one | |||
|
667 | can_append = self.docket.uuid is not None | |||
|
668 | packed, append = self._rustmap.write_v2(now, can_append) | |||
|
669 | if append: | |||
|
670 | docket = self.docket | |||
|
671 | data_filename = docket.data_filename() | |||
|
672 | if tr: | |||
|
673 | tr.add(data_filename, docket.data_size) | |||
|
674 | with self._opener(data_filename, b'r+b') as fp: | |||
|
675 | fp.seek(docket.data_size) | |||
|
676 | assert fp.tell() == docket.data_size | |||
|
677 | written = fp.write(packed) | |||
|
678 | if written is not None: # py2 may return None | |||
|
679 | assert written == len(packed), (written, len(packed)) | |||
|
680 | docket.data_size += len(packed) | |||
|
681 | docket.parents = self.parents() | |||
|
682 | st.write(docket.serialize()) | |||
|
683 | st.close() | |||
|
684 | else: | |||
660 | old_docket = self.docket |
|
685 | old_docket = self.docket | |
661 | new_docket = docketmod.DirstateDocket.with_new_uuid( |
|
686 | new_docket = docketmod.DirstateDocket.with_new_uuid( | |
662 | self.parents(), len(packed) |
|
687 | self.parents(), len(packed) | |
663 | ) |
|
688 | ) | |
664 |
|
|
689 | data_filename = new_docket.data_filename() | |
|
690 | if tr: | |||
|
691 | tr.add(data_filename, 0) | |||
|
692 | self._opener.write(data_filename, packed) | |||
665 | # Write the new docket after the new data file has been |
|
693 | # Write the new docket after the new data file has been | |
666 | # written. Because `st` was opened with `atomictemp=True`, |
|
694 | # written. Because `st` was opened with `atomictemp=True`, | |
667 | # the actual `.hg/dirstate` file is only affected on close. |
|
695 | # the actual `.hg/dirstate` file is only affected on close. | |
@@ -670,13 +698,16 b' if rustmod is not None:' | |||||
670 | # Remove the old data file after the new docket pointing to |
|
698 | # Remove the old data file after the new docket pointing to | |
671 | # the new data file was written. |
|
699 | # the new data file was written. | |
672 | if old_docket.uuid: |
|
700 | if old_docket.uuid: | |
673 |
|
|
701 | data_filename = old_docket.data_filename() | |
|
702 | unlink = lambda _tr=None: self._opener.unlink(data_filename) | |||
|
703 | if tr: | |||
|
704 | category = b"dirstate-v2-clean-" + old_docket.uuid | |||
|
705 | tr.addpostclose(category, unlink) | |||
|
706 | else: | |||
|
707 | unlink() | |||
674 | self._docket = new_docket |
|
708 | self._docket = new_docket | |
675 | else: |
|
709 | # Reload from the newly-written file | |
676 | p1, p2 = self.parents() |
|
710 | util.clearcachedproperty(self, b"_rustmap") | |
677 | packed = self._rustmap.write_v1(p1, p2, now) |
|
|||
678 | st.write(packed) |
|
|||
679 | st.close() |
|
|||
680 | self._dirtyparents = False |
|
711 | self._dirtyparents = False | |
681 |
|
712 | |||
682 | @propertycache |
|
713 | @propertycache |
@@ -468,6 +468,24 b" impl<'on_disk> DirstateMap<'on_disk> {" | |||||
468 | Ok((map, parents)) |
|
468 | Ok((map, parents)) | |
469 | } |
|
469 | } | |
470 |
|
470 | |||
|
471 | /// Assuming dirstate-v2 format, returns whether the next write should | |||
|
472 | /// append to the existing data file that contains `self.on_disk` (true), | |||
|
473 | /// or create a new data file from scratch (false). | |||
|
474 | pub(super) fn write_should_append(&self) -> bool { | |||
|
475 | // Soon this will be a heuristic based on the amount of unreachable | |||
|
476 | // data. For now it’s pseudo-random in order to make tests exercise | |||
|
477 | // both code paths. | |||
|
478 | ||||
|
479 | fn bad_rng() -> u32 { | |||
|
480 | std::time::SystemTime::now() | |||
|
481 | .duration_since(std::time::UNIX_EPOCH) | |||
|
482 | .unwrap() | |||
|
483 | .subsec_millis() | |||
|
484 | } | |||
|
485 | ||||
|
486 | bad_rng() % 2 == 0 | |||
|
487 | } | |||
|
488 | ||||
471 | fn get_node<'tree>( |
|
489 | fn get_node<'tree>( | |
472 | &'tree self, |
|
490 | &'tree self, | |
473 | path: &HgPath, |
|
491 | path: &HgPath, | |
@@ -1043,8 +1061,15 b" impl<'on_disk> super::dispatch::Dirstate" | |||||
1043 | Ok(packed) |
|
1061 | Ok(packed) | |
1044 | } |
|
1062 | } | |
1045 |
|
1063 | |||
|
1064 | /// Returns new data together with whether that data should be appended to | |||
|
1065 | /// the existing data file whose content is at `self.on_disk` (true), | |||
|
1066 | /// instead of written to a new data file (false). | |||
1046 | #[timed] |
|
1067 | #[timed] | |
1047 | fn pack_v2(&mut self, now: Timestamp) -> Result<Vec<u8>, DirstateError> { |
|
1068 | fn pack_v2( | |
|
1069 | &mut self, | |||
|
1070 | now: Timestamp, | |||
|
1071 | can_append: bool, | |||
|
1072 | ) -> Result<(Vec<u8>, bool), DirstateError> { | |||
1048 | // TODO: how do we want to handle this in 2038? |
|
1073 | // TODO: how do we want to handle this in 2038? | |
1049 | let now: i32 = now.0.try_into().expect("time overflow"); |
|
1074 | let now: i32 = now.0.try_into().expect("time overflow"); | |
1050 | let mut paths = Vec::new(); |
|
1075 | let mut paths = Vec::new(); | |
@@ -1063,7 +1088,7 b" impl<'on_disk> super::dispatch::Dirstate" | |||||
1063 |
|
1088 | |||
1064 | self.clear_known_ambiguous_mtimes(&paths)?; |
|
1089 | self.clear_known_ambiguous_mtimes(&paths)?; | |
1065 |
|
1090 | |||
1066 | on_disk::write(self) |
|
1091 | on_disk::write(self, can_append) | |
1067 | } |
|
1092 | } | |
1068 |
|
1093 | |||
1069 | fn status<'a>( |
|
1094 | fn status<'a>( |
@@ -179,11 +179,19 b' pub trait DirstateMapMethods {' | |||||
179 |
|
179 | |||
180 | /// Clear mtimes that are ambigous with `now` (similar to |
|
180 | /// Clear mtimes that are ambigous with `now` (similar to | |
181 | /// `clear_ambiguous_times` but for all files in the dirstate map), and |
|
181 | /// `clear_ambiguous_times` but for all files in the dirstate map), and | |
182 |
/// serialize bytes to write |
|
182 | /// serialize bytes to write a dirstate data file to disk in dirstate-v2 | |
183 | /// format. |
|
183 | /// format. | |
184 | /// |
|
184 | /// | |
|
185 | /// Returns new data together with whether that data should be appended to | |||
|
186 | /// the existing data file whose content is at `self.on_disk` (true), | |||
|
187 | /// instead of written to a new data file (false). | |||
|
188 | /// | |||
185 | /// Note: this is only supported by the tree dirstate map. |
|
189 | /// Note: this is only supported by the tree dirstate map. | |
186 | fn pack_v2(&mut self, now: Timestamp) -> Result<Vec<u8>, DirstateError>; |
|
190 | fn pack_v2( | |
|
191 | &mut self, | |||
|
192 | now: Timestamp, | |||
|
193 | can_append: bool, | |||
|
194 | ) -> Result<(Vec<u8>, bool), DirstateError>; | |||
187 |
|
195 | |||
188 | /// Run the status algorithm. |
|
196 | /// Run the status algorithm. | |
189 | /// |
|
197 | /// | |
@@ -383,7 +391,11 b' impl DirstateMapMethods for DirstateMap ' | |||||
383 | self.pack(parents, now) |
|
391 | self.pack(parents, now) | |
384 | } |
|
392 | } | |
385 |
|
393 | |||
386 | fn pack_v2(&mut self, _now: Timestamp) -> Result<Vec<u8>, DirstateError> { |
|
394 | fn pack_v2( | |
|
395 | &mut self, | |||
|
396 | _now: Timestamp, | |||
|
397 | _can_append: bool, | |||
|
398 | ) -> Result<(Vec<u8>, bool), DirstateError> { | |||
387 | panic!( |
|
399 | panic!( | |
388 | "should have used dirstate_tree::DirstateMap to use the v2 format" |
|
400 | "should have used dirstate_tree::DirstateMap to use the v2 format" | |
389 | ) |
|
401 | ) |
@@ -544,20 +544,28 b" pub(crate) fn for_each_tracked_path<'on_" | |||||
544 | recur(on_disk, root.root_nodes, &mut f) |
|
544 | recur(on_disk, root.root_nodes, &mut f) | |
545 | } |
|
545 | } | |
546 |
|
546 | |||
|
547 | /// Returns new data together with whether that data should be appended to the | |||
|
548 | /// existing data file whose content is at `dirstate_map.on_disk` (true), | |||
|
549 | /// instead of written to a new data file (false). | |||
547 | pub(super) fn write( |
|
550 | pub(super) fn write( | |
548 | dirstate_map: &mut DirstateMap, |
|
551 | dirstate_map: &mut DirstateMap, | |
549 | ) -> Result<Vec<u8>, DirstateError> { |
|
552 | can_append: bool, | |
550 | let root_len = std::mem::size_of::<Root>(); |
|
553 | ) -> Result<(Vec<u8>, bool), DirstateError> { | |
|
554 | let append = can_append && dirstate_map.write_should_append(); | |||
551 |
|
555 | |||
552 | // This ignores the space for paths, and for nodes without an entry. |
|
556 | // This ignores the space for paths, and for nodes without an entry. | |
553 | // TODO: better estimate? Skip the `Vec` and write to a file directly? |
|
557 | // TODO: better estimate? Skip the `Vec` and write to a file directly? | |
554 |
let size_guess = |
|
558 | let size_guess = std::mem::size_of::<Root>() | |
555 | + std::mem::size_of::<Node>() |
|
559 | + std::mem::size_of::<Node>() | |
556 | * dirstate_map.nodes_with_entry_count as usize; |
|
560 | * dirstate_map.nodes_with_entry_count as usize; | |
557 | let mut out = Vec::with_capacity(size_guess); |
|
|||
558 |
|
561 | |||
559 | let root_nodes = |
|
562 | let mut writer = Writer { | |
560 | write_nodes(dirstate_map, dirstate_map.root.as_ref(), &mut out)?; |
|
563 | dirstate_map, | |
|
564 | append, | |||
|
565 | out: Vec::with_capacity(size_guess), | |||
|
566 | }; | |||
|
567 | ||||
|
568 | let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?; | |||
561 |
|
569 | |||
562 | let root = Root { |
|
570 | let root = Root { | |
563 | root_nodes, |
|
571 | root_nodes, | |
@@ -567,112 +575,121 b' pub(super) fn write(' | |||||
567 | .into(), |
|
575 | .into(), | |
568 | ignore_patterns_hash: dirstate_map.ignore_patterns_hash, |
|
576 | ignore_patterns_hash: dirstate_map.ignore_patterns_hash, | |
569 | }; |
|
577 | }; | |
570 | out.extend(root.as_bytes()); |
|
578 | writer.out.extend(root.as_bytes()); | |
571 | Ok(out) |
|
579 | Ok((writer.out, append)) | |
|
580 | } | |||
|
581 | ||||
|
582 | struct Writer<'dmap, 'on_disk> { | |||
|
583 | dirstate_map: &'dmap DirstateMap<'on_disk>, | |||
|
584 | append: bool, | |||
|
585 | out: Vec<u8>, | |||
572 | } |
|
586 | } | |
573 |
|
587 | |||
574 | fn write_nodes( |
|
588 | impl Writer<'_, '_> { | |
575 | dirstate_map: &DirstateMap, |
|
589 | fn write_nodes( | |
576 | nodes: dirstate_map::ChildNodesRef, |
|
590 | &mut self, | |
577 | out: &mut Vec<u8>, |
|
591 | nodes: dirstate_map::ChildNodesRef, | |
578 | ) -> Result<ChildNodes, DirstateError> { |
|
592 | ) -> Result<ChildNodes, DirstateError> { | |
579 | // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration |
|
593 | // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration | |
580 | // order. Sort to enable binary search in the written file. |
|
594 | // order. Sort to enable binary search in the written file. | |
581 | let nodes = nodes.sorted(); |
|
595 | let nodes = nodes.sorted(); | |
582 | let nodes_len = nodes.len(); |
|
596 | let nodes_len = nodes.len(); | |
583 |
|
597 | |||
584 | // First accumulate serialized nodes in a `Vec` |
|
598 | // First accumulate serialized nodes in a `Vec` | |
585 | let mut on_disk_nodes = Vec::with_capacity(nodes_len); |
|
599 | let mut on_disk_nodes = Vec::with_capacity(nodes_len); | |
586 | for node in nodes { |
|
600 | for node in nodes { | |
587 |
let children = |
|
601 | let children = | |
588 | dirstate_map, |
|
602 | self.write_nodes(node.children(self.dirstate_map.on_disk)?)?; | |
589 |
node. |
|
603 | let full_path = node.full_path(self.dirstate_map.on_disk)?; | |
590 | out, |
|
604 | let full_path = self.write_path(full_path.as_bytes()); | |
591 | )?; |
|
605 | let copy_source = if let Some(source) = | |
592 |
|
|
606 | node.copy_source(self.dirstate_map.on_disk)? | |
593 | let full_path = write_path(full_path.as_bytes(), out); |
|
607 | { | |
594 | let copy_source = |
|
608 | self.write_path(source.as_bytes()) | |
595 | if let Some(source) = node.copy_source(dirstate_map.on_disk)? { |
|
|||
596 | write_path(source.as_bytes(), out) |
|
|||
597 | } else { |
|
609 | } else { | |
598 | PathSlice { |
|
610 | PathSlice { | |
599 | start: 0.into(), |
|
611 | start: 0.into(), | |
600 | len: 0.into(), |
|
612 | len: 0.into(), | |
601 | } |
|
613 | } | |
602 | }; |
|
614 | }; | |
603 | on_disk_nodes.push(match node { |
|
615 | on_disk_nodes.push(match node { | |
604 | NodeRef::InMemory(path, node) => { |
|
616 | NodeRef::InMemory(path, node) => { | |
605 | let (state, data) = match &node.data { |
|
617 | let (state, data) = match &node.data { | |
606 | dirstate_map::NodeData::Entry(entry) => ( |
|
618 | dirstate_map::NodeData::Entry(entry) => ( | |
607 | entry.state.into(), |
|
619 | entry.state.into(), | |
608 | Entry { |
|
620 | Entry { | |
609 | mode: entry.mode.into(), |
|
621 | mode: entry.mode.into(), | |
610 | mtime: entry.mtime.into(), |
|
622 | mtime: entry.mtime.into(), | |
611 | size: entry.size.into(), |
|
623 | size: entry.size.into(), | |
612 | }, |
|
624 | }, | |
613 | ), |
|
625 | ), | |
614 | dirstate_map::NodeData::CachedDirectory { mtime } => { |
|
626 | dirstate_map::NodeData::CachedDirectory { mtime } => { | |
615 | (b'd', Entry::from_timestamp(*mtime)) |
|
627 | (b'd', Entry::from_timestamp(*mtime)) | |
|
628 | } | |||
|
629 | dirstate_map::NodeData::None => ( | |||
|
630 | b'\0', | |||
|
631 | Entry { | |||
|
632 | mode: 0.into(), | |||
|
633 | mtime: 0.into(), | |||
|
634 | size: 0.into(), | |||
|
635 | }, | |||
|
636 | ), | |||
|
637 | }; | |||
|
638 | Node { | |||
|
639 | children, | |||
|
640 | copy_source, | |||
|
641 | full_path, | |||
|
642 | base_name_start: u16::try_from(path.base_name_start()) | |||
|
643 | // Could only panic for paths over 64 KiB | |||
|
644 | .expect("dirstate-v2 path length overflow") | |||
|
645 | .into(), | |||
|
646 | descendants_with_entry_count: node | |||
|
647 | .descendants_with_entry_count | |||
|
648 | .into(), | |||
|
649 | tracked_descendants_count: node | |||
|
650 | .tracked_descendants_count | |||
|
651 | .into(), | |||
|
652 | state, | |||
|
653 | data, | |||
616 | } |
|
654 | } | |
617 | dirstate_map::NodeData::None => ( |
|
655 | } | |
618 | b'\0', |
|
656 | NodeRef::OnDisk(node) => Node { | |
619 | Entry { |
|
|||
620 | mode: 0.into(), |
|
|||
621 | mtime: 0.into(), |
|
|||
622 | size: 0.into(), |
|
|||
623 | }, |
|
|||
624 | ), |
|
|||
625 | }; |
|
|||
626 | Node { |
|
|||
627 | children, |
|
657 | children, | |
628 | copy_source, |
|
658 | copy_source, | |
629 | full_path, |
|
659 | full_path, | |
630 | base_name_start: u16::try_from(path.base_name_start()) |
|
660 | ..*node | |
631 | // Could only panic for paths over 64 KiB |
|
661 | }, | |
632 | .expect("dirstate-v2 path length overflow") |
|
662 | }) | |
633 | .into(), |
|
663 | } | |
634 | descendants_with_entry_count: node |
|
664 | // … so we can write them contiguously, after writing everything else | |
635 | .descendants_with_entry_count |
|
665 | // they refer to. | |
636 | .into(), |
|
666 | let start = self.current_offset(); | |
637 | tracked_descendants_count: node |
|
667 | let len = u32::try_from(nodes_len) | |
638 | .tracked_descendants_count |
|
668 | // Could only panic with over 4 billion nodes | |
639 | .into(), |
|
669 | .expect("dirstate-v2 path length overflow") | |
640 | state, |
|
670 | .into(); | |
641 | data, |
|
671 | self.out.extend(on_disk_nodes.as_bytes()); | |
642 | } |
|
672 | Ok(ChildNodes { start, len }) | |
643 | } |
|
|||
644 | NodeRef::OnDisk(node) => Node { |
|
|||
645 | children, |
|
|||
646 | copy_source, |
|
|||
647 | full_path, |
|
|||
648 | ..*node |
|
|||
649 | }, |
|
|||
650 | }) |
|
|||
651 | } |
|
673 | } | |
652 | // … so we can write them contiguously, after writing everything else they |
|
|||
653 | // refer to. |
|
|||
654 | let start = current_offset(out); |
|
|||
655 | let len = u32::try_from(nodes_len) |
|
|||
656 | // Could only panic with over 4 billion nodes |
|
|||
657 | .expect("dirstate-v2 path length overflow") |
|
|||
658 | .into(); |
|
|||
659 | out.extend(on_disk_nodes.as_bytes()); |
|
|||
660 | Ok(ChildNodes { start, len }) |
|
|||
661 | } |
|
|||
662 |
|
674 | |||
663 |
fn current_offset( |
|
675 | fn current_offset(&mut self) -> Offset { | |
664 | u32::try_from(out.len()) |
|
676 | let mut offset = self.out.len(); | |
665 | // Could only panic for a dirstate file larger than 4 GiB |
|
677 | if self.append { | |
666 | .expect("dirstate-v2 offset overflow") |
|
678 | offset += self.dirstate_map.on_disk.len() | |
667 |
|
|
679 | } | |
668 | } |
|
680 | u32::try_from(offset) | |
|
681 | // Could only panic for a dirstate file larger than 4 GiB | |||
|
682 | .expect("dirstate-v2 offset overflow") | |||
|
683 | .into() | |||
|
684 | } | |||
669 |
|
685 | |||
670 |
fn write_path(slice: &[u8] |
|
686 | fn write_path(&mut self, slice: &[u8]) -> PathSlice { | |
671 |
let start = current_offset( |
|
687 | let start = self.current_offset(); | |
672 | let len = u16::try_from(slice.len()) |
|
688 | let len = u16::try_from(slice.len()) | |
673 | // Could only panic for paths over 64 KiB |
|
689 | // Could only panic for paths over 64 KiB | |
674 | .expect("dirstate-v2 path length overflow") |
|
690 | .expect("dirstate-v2 path length overflow") | |
675 | .into(); |
|
691 | .into(); | |
676 | out.extend(slice.as_bytes()); |
|
692 | self.out.extend(slice.as_bytes()); | |
677 | PathSlice { start, len } |
|
693 | PathSlice { start, len } | |
|
694 | } | |||
678 | } |
|
695 | } |
@@ -340,16 +340,23 b' py_class!(pub class DirstateMap |py| {' | |||||
340 | } |
|
340 | } | |
341 | } |
|
341 | } | |
342 |
|
342 | |||
|
343 | /// Returns new data together with whether that data should be appended to | |||
|
344 | /// the existing data file whose content is at `self.on_disk` (True), | |||
|
345 | /// instead of written to a new data file (False). | |||
343 | def write_v2( |
|
346 | def write_v2( | |
344 | &self, |
|
347 | &self, | |
345 | now: PyObject |
|
348 | now: PyObject, | |
346 | ) -> PyResult<PyBytes> { |
|
349 | can_append: bool, | |
|
350 | ) -> PyResult<PyObject> { | |||
347 | let now = Timestamp(now.extract(py)?); |
|
351 | let now = Timestamp(now.extract(py)?); | |
348 |
|
352 | |||
349 | let mut inner = self.inner(py).borrow_mut(); |
|
353 | let mut inner = self.inner(py).borrow_mut(); | |
350 | let result = inner.pack_v2(now); |
|
354 | let result = inner.pack_v2(now, can_append); | |
351 | match result { |
|
355 | match result { | |
352 | Ok(packed) => Ok(PyBytes::new(py, &packed)), |
|
356 | Ok((packed, append)) => { | |
|
357 | let packed = PyBytes::new(py, &packed); | |||
|
358 | Ok((packed, append).to_py_object(py).into_object()) | |||
|
359 | }, | |||
353 | Err(_) => Err(PyErr::new::<exc::OSError, _>( |
|
360 | Err(_) => Err(PyErr::new::<exc::OSError, _>( | |
354 | py, |
|
361 | py, | |
355 | "Dirstate error".to_string(), |
|
362 | "Dirstate error".to_string(), |
@@ -124,8 +124,12 b' impl DirstateMapMethods for OwningDirsta' | |||||
124 | self.get_mut().pack_v1(parents, now) |
|
124 | self.get_mut().pack_v1(parents, now) | |
125 | } |
|
125 | } | |
126 |
|
126 | |||
127 | fn pack_v2(&mut self, now: Timestamp) -> Result<Vec<u8>, DirstateError> { |
|
127 | fn pack_v2( | |
128 | self.get_mut().pack_v2(now) |
|
128 | &mut self, | |
|
129 | now: Timestamp, | |||
|
130 | can_append: bool, | |||
|
131 | ) -> Result<(Vec<u8>, bool), DirstateError> { | |||
|
132 | self.get_mut().pack_v2(now, can_append) | |||
129 | } |
|
133 | } | |
130 |
|
134 | |||
131 | fn status<'a>( |
|
135 | fn status<'a>( |
General Comments 0
You need to be logged in to leave comments.
Login now