Show More
@@ -20,6 +20,7 b' use crate::{' | |||||
20 | pub const INDEX_ENTRY_SIZE: usize = 64; |
|
20 | pub const INDEX_ENTRY_SIZE: usize = 64; | |
21 | pub const COMPRESSION_MODE_INLINE: u8 = 2; |
|
21 | pub const COMPRESSION_MODE_INLINE: u8 = 2; | |
22 |
|
22 | |||
|
23 | #[derive(Debug)] | |||
23 | pub struct IndexHeader { |
|
24 | pub struct IndexHeader { | |
24 | pub(super) header_bytes: [u8; 4], |
|
25 | pub(super) header_bytes: [u8; 4], | |
25 | } |
|
26 | } | |
@@ -523,6 +524,13 b' impl Index {' | |||||
523 | } |
|
524 | } | |
524 | } |
|
525 | } | |
525 |
|
526 | |||
|
527 | fn null_entry(&self) -> IndexEntry { | |||
|
528 | IndexEntry { | |||
|
529 | bytes: &[0; INDEX_ENTRY_SIZE], | |||
|
530 | offset_override: Some(0), | |||
|
531 | } | |||
|
532 | } | |||
|
533 | ||||
526 | /// Return the head revisions of this index |
|
534 | /// Return the head revisions of this index | |
527 | pub fn head_revs(&mut self) -> Result<Vec<Revision>, GraphError> { |
|
535 | pub fn head_revs(&mut self) -> Result<Vec<Revision>, GraphError> { | |
528 | self.head_revs_filtered(&HashSet::new()) |
|
536 | self.head_revs_filtered(&HashSet::new()) | |
@@ -592,12 +600,12 b' impl Index {' | |||||
592 | } else { |
|
600 | } else { | |
593 | UncheckedRevision(current_rev.0 - 1) |
|
601 | UncheckedRevision(current_rev.0 - 1) | |
594 | }; |
|
602 | }; | |
595 | if new_rev.0 == NULL_REVISION.0 { |
|
|||
596 | break; |
|
|||
597 | } |
|
|||
598 | current_rev = self.check_revision(new_rev).ok_or_else(|| { |
|
603 | current_rev = self.check_revision(new_rev).ok_or_else(|| { | |
599 | HgError::corrupted(format!("Revision {new_rev} out of range")) |
|
604 | HgError::corrupted(format!("Revision {new_rev} out of range")) | |
600 | })?; |
|
605 | })?; | |
|
606 | if current_rev.0 == NULL_REVISION.0 { | |||
|
607 | break; | |||
|
608 | } | |||
601 | entry = self.get_entry(current_rev).unwrap() |
|
609 | entry = self.get_entry(current_rev).unwrap() | |
602 | } |
|
610 | } | |
603 |
|
611 | |||
@@ -745,6 +753,163 b' impl Index {' | |||||
745 | .ok_or_else(|| RevlogError::corrupted("test"))?; |
|
753 | .ok_or_else(|| RevlogError::corrupted("test"))?; | |
746 | self.is_snapshot_unchecked(rev) |
|
754 | self.is_snapshot_unchecked(rev) | |
747 | } |
|
755 | } | |
|
756 | ||||
|
757 | /// Slice revs to reduce the amount of unrelated data to be read from disk. | |||
|
758 | /// | |||
|
759 | /// The index is sliced into groups that should be read in one time. | |||
|
760 | /// | |||
|
761 | /// The initial chunk is sliced until the overall density | |||
|
762 | /// (payload/chunks-span ratio) is above `target_density`. | |||
|
763 | /// No gap smaller than `min_gap_size` is skipped. | |||
|
764 | pub fn slice_chunk_to_density( | |||
|
765 | &self, | |||
|
766 | revs: &[Revision], | |||
|
767 | target_density: f64, | |||
|
768 | min_gap_size: usize, | |||
|
769 | ) -> Vec<Vec<Revision>> { | |||
|
770 | if revs.is_empty() { | |||
|
771 | return vec![]; | |||
|
772 | } | |||
|
773 | if revs.len() == 1 { | |||
|
774 | return vec![revs.to_owned()]; | |||
|
775 | } | |||
|
776 | let delta_chain_span = self.segment_span(revs); | |||
|
777 | if delta_chain_span < min_gap_size { | |||
|
778 | return vec![revs.to_owned()]; | |||
|
779 | } | |||
|
780 | let entries: Vec<_> = revs | |||
|
781 | .iter() | |||
|
782 | .map(|r| { | |||
|
783 | (*r, self.get_entry(*r).unwrap_or_else(|| self.null_entry())) | |||
|
784 | }) | |||
|
785 | .collect(); | |||
|
786 | ||||
|
787 | let mut read_data = delta_chain_span; | |||
|
788 | let chain_payload: u32 = | |||
|
789 | entries.iter().map(|(_r, e)| e.compressed_len()).sum(); | |||
|
790 | let mut density = if delta_chain_span > 0 { | |||
|
791 | chain_payload as f64 / delta_chain_span as f64 | |||
|
792 | } else { | |||
|
793 | 1.0 | |||
|
794 | }; | |||
|
795 | ||||
|
796 | if density >= target_density { | |||
|
797 | return vec![revs.to_owned()]; | |||
|
798 | } | |||
|
799 | ||||
|
800 | // Store the gaps in a heap to have them sorted by decreasing size | |||
|
801 | let mut gaps = Vec::new(); | |||
|
802 | let mut previous_end = None; | |||
|
803 | ||||
|
804 | for (i, (_rev, entry)) in entries.iter().enumerate() { | |||
|
805 | let start = entry.c_start() as usize; | |||
|
806 | let length = entry.compressed_len(); | |||
|
807 | ||||
|
808 | // Skip empty revisions to form larger holes | |||
|
809 | if length == 0 { | |||
|
810 | continue; | |||
|
811 | } | |||
|
812 | ||||
|
813 | if let Some(end) = previous_end { | |||
|
814 | let gap_size = start - end; | |||
|
815 | // Only consider holes that are large enough | |||
|
816 | if gap_size > min_gap_size { | |||
|
817 | gaps.push((gap_size, i)); | |||
|
818 | } | |||
|
819 | } | |||
|
820 | previous_end = Some(start + length as usize); | |||
|
821 | } | |||
|
822 | if gaps.is_empty() { | |||
|
823 | return vec![revs.to_owned()]; | |||
|
824 | } | |||
|
825 | // sort the gaps to pop them from largest to small | |||
|
826 | gaps.sort_unstable(); | |||
|
827 | ||||
|
828 | // Collect the indices of the largest holes until | |||
|
829 | // the density is acceptable | |||
|
830 | let mut selected = vec![]; | |||
|
831 | while let Some((gap_size, gap_id)) = gaps.pop() { | |||
|
832 | if density >= target_density { | |||
|
833 | break; | |||
|
834 | } | |||
|
835 | selected.push(gap_id); | |||
|
836 | ||||
|
837 | // The gap sizes are stored as negatives to be sorted decreasingly | |||
|
838 | // by the heap | |||
|
839 | read_data -= gap_size; | |||
|
840 | density = if read_data > 0 { | |||
|
841 | chain_payload as f64 / read_data as f64 | |||
|
842 | } else { | |||
|
843 | 1.0 | |||
|
844 | }; | |||
|
845 | if density >= target_density { | |||
|
846 | break; | |||
|
847 | } | |||
|
848 | } | |||
|
849 | selected.sort_unstable(); | |||
|
850 | selected.push(revs.len()); | |||
|
851 | ||||
|
852 | // Cut the revs at collected indices | |||
|
853 | let mut previous_idx = 0; | |||
|
854 | let mut chunks = vec![]; | |||
|
855 | for idx in selected { | |||
|
856 | let chunk = self.trim_chunk(&entries, previous_idx, idx); | |||
|
857 | if !chunk.is_empty() { | |||
|
858 | chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect()); | |||
|
859 | } | |||
|
860 | previous_idx = idx; | |||
|
861 | } | |||
|
862 | let chunk = self.trim_chunk(&entries, previous_idx, entries.len()); | |||
|
863 | if !chunk.is_empty() { | |||
|
864 | chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect()); | |||
|
865 | } | |||
|
866 | ||||
|
867 | chunks | |||
|
868 | } | |||
|
869 | ||||
|
870 | /// Get the byte span of a segment of sorted revisions. | |||
|
871 | /// | |||
|
872 | /// Occurrences of [`NULL_REVISION`] are ignored at the beginning of | |||
|
873 | /// the `revs` segment. | |||
|
874 | /// | |||
|
875 | /// panics: | |||
|
876 | /// - if `revs` is empty or only made of `NULL_REVISION` | |||
|
877 | /// - if cannot retrieve entry for the last or first not null element of | |||
|
878 | /// `revs`. | |||
|
879 | fn segment_span(&self, revs: &[Revision]) -> usize { | |||
|
880 | if revs.is_empty() { | |||
|
881 | return 0; | |||
|
882 | } | |||
|
883 | let last_entry = &self.get_entry(revs[revs.len() - 1]).unwrap(); | |||
|
884 | let end = last_entry.c_start() + last_entry.compressed_len() as u64; | |||
|
885 | let first_rev = revs.iter().find(|r| r.0 != NULL_REVISION.0).unwrap(); | |||
|
886 | let start = if (*first_rev).0 == 0 { | |||
|
887 | 0 | |||
|
888 | } else { | |||
|
889 | self.get_entry(*first_rev).unwrap().c_start() | |||
|
890 | }; | |||
|
891 | (end - start) as usize | |||
|
892 | } | |||
|
893 | ||||
|
894 | /// Returns `&revs[startidx..endidx]` without empty trailing revs | |||
|
895 | fn trim_chunk<'a>( | |||
|
896 | &'a self, | |||
|
897 | revs: &'a [(Revision, IndexEntry)], | |||
|
898 | start: usize, | |||
|
899 | mut end: usize, | |||
|
900 | ) -> &'a [(Revision, IndexEntry)] { | |||
|
901 | // Trim empty revs at the end, except the very first rev of a chain | |||
|
902 | let last_rev = revs[end - 1].0; | |||
|
903 | if last_rev.0 < self.len() as BaseRevision { | |||
|
904 | while end > 1 | |||
|
905 | && end > start | |||
|
906 | && revs[end - 1].1.compressed_len() == 0 | |||
|
907 | { | |||
|
908 | end -= 1 | |||
|
909 | } | |||
|
910 | } | |||
|
911 | &revs[start..end] | |||
|
912 | } | |||
748 | } |
|
913 | } | |
749 | fn inline_scan(bytes: &[u8]) -> (usize, Vec<usize>) { |
|
914 | fn inline_scan(bytes: &[u8]) -> (usize, Vec<usize>) { | |
750 | let mut offset: usize = 0; |
|
915 | let mut offset: usize = 0; | |
@@ -807,6 +972,11 b" impl<'a> IndexEntry<'a> {" | |||||
807 | BigEndian::read_u64(&self.bytes[0..8]) |
|
972 | BigEndian::read_u64(&self.bytes[0..8]) | |
808 | } |
|
973 | } | |
809 |
|
974 | |||
|
975 | /// Same result (except potentially for rev 0) as C `index_get_start()` | |||
|
976 | fn c_start(&self) -> u64 { | |||
|
977 | self.raw_offset() >> 16 | |||
|
978 | } | |||
|
979 | ||||
810 | pub fn flags(&self) -> u16 { |
|
980 | pub fn flags(&self) -> u16 { | |
811 | BigEndian::read_u16(&self.bytes[6..=7]) |
|
981 | BigEndian::read_u16(&self.bytes[6..=7]) | |
812 | } |
|
982 | } |
@@ -357,7 +357,37 b' py_class!(pub class MixedIndex |py| {' | |||||
357 |
|
357 | |||
358 | /// slice planned chunk read to reach a density threshold |
|
358 | /// slice planned chunk read to reach a density threshold | |
359 | def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> { |
|
359 | def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> { | |
360 |
self. |
|
360 | let rust_res = self.inner_slicechunktodensity( | |
|
361 | py, | |||
|
362 | args.get_item(py, 0), | |||
|
363 | args.get_item(py, 1).extract(py)?, | |||
|
364 | args.get_item(py, 2).extract(py)? | |||
|
365 | )?; | |||
|
366 | ||||
|
367 | let c_res = self.call_cindex(py, "slicechunktodensity", args, kw)?; | |||
|
368 | assert_eq!( | |||
|
369 | rust_res.len(), | |||
|
370 | c_res.len(py)?, | |||
|
371 | "chunks differ {:?} {}", | |||
|
372 | rust_res, c_res | |||
|
373 | ); | |||
|
374 | for (i, chunk) in rust_res.iter().enumerate() { | |||
|
375 | let c_chunk = c_res.get_item(py, i)?; | |||
|
376 | assert_eq!( | |||
|
377 | chunk.len(), | |||
|
378 | c_chunk.len(py)?, | |||
|
379 | "chunk {} length differ {:?} {}", | |||
|
380 | i, | |||
|
381 | chunk, | |||
|
382 | c_res | |||
|
383 | ); | |||
|
384 | for (j, rev) in chunk.iter().enumerate() { | |||
|
385 | let c_chunk: BaseRevision | |||
|
386 | = c_chunk.get_item(py, j)?.extract(py)?; | |||
|
387 | assert_eq!(c_chunk, rev.0); | |||
|
388 | } | |||
|
389 | } | |||
|
390 | Ok(c_res) | |||
361 | } |
|
391 | } | |
362 |
|
392 | |||
363 | /// stats for the index |
|
393 | /// stats for the index | |
@@ -819,6 +849,18 b' impl MixedIndex {' | |||||
819 | .collect(); |
|
849 | .collect(); | |
820 | Ok(PyList::new(py, &as_vec).into_object()) |
|
850 | Ok(PyList::new(py, &as_vec).into_object()) | |
821 | } |
|
851 | } | |
|
852 | ||||
|
853 | fn inner_slicechunktodensity( | |||
|
854 | &self, | |||
|
855 | py: Python, | |||
|
856 | revs: PyObject, | |||
|
857 | target_density: f64, | |||
|
858 | min_gap_size: usize, | |||
|
859 | ) -> PyResult<Vec<Vec<Revision>>> { | |||
|
860 | let index = &mut *self.index(py).borrow_mut(); | |||
|
861 | let revs: Vec<_> = rev_pyiter_collect(py, &revs, index)?; | |||
|
862 | Ok(index.slice_chunk_to_density(&revs, target_density, min_gap_size)) | |||
|
863 | } | |||
822 | } |
|
864 | } | |
823 |
|
865 | |||
824 | fn revlog_error(py: Python) -> PyErr { |
|
866 | fn revlog_error(py: Python) -> PyErr { |
General Comments 0
You need to be logged in to leave comments.
Login now