##// END OF EJS Templates
rust-index: add support for `_slicechunktodensity`
Raphaël Gomès -
r52111:0112803e default
parent child Browse files
Show More
@@ -20,6 +20,7 b' use crate::{'
20 pub const INDEX_ENTRY_SIZE: usize = 64;
20 pub const INDEX_ENTRY_SIZE: usize = 64;
21 pub const COMPRESSION_MODE_INLINE: u8 = 2;
21 pub const COMPRESSION_MODE_INLINE: u8 = 2;
22
22
23 #[derive(Debug)]
23 pub struct IndexHeader {
24 pub struct IndexHeader {
24 pub(super) header_bytes: [u8; 4],
25 pub(super) header_bytes: [u8; 4],
25 }
26 }
@@ -523,6 +524,13 b' impl Index {'
523 }
524 }
524 }
525 }
525
526
527 fn null_entry(&self) -> IndexEntry {
528 IndexEntry {
529 bytes: &[0; INDEX_ENTRY_SIZE],
530 offset_override: Some(0),
531 }
532 }
533
526 /// Return the head revisions of this index
534 /// Return the head revisions of this index
527 pub fn head_revs(&mut self) -> Result<Vec<Revision>, GraphError> {
535 pub fn head_revs(&mut self) -> Result<Vec<Revision>, GraphError> {
528 self.head_revs_filtered(&HashSet::new())
536 self.head_revs_filtered(&HashSet::new())
@@ -592,12 +600,12 b' impl Index {'
592 } else {
600 } else {
593 UncheckedRevision(current_rev.0 - 1)
601 UncheckedRevision(current_rev.0 - 1)
594 };
602 };
595 if new_rev.0 == NULL_REVISION.0 {
596 break;
597 }
598 current_rev = self.check_revision(new_rev).ok_or_else(|| {
603 current_rev = self.check_revision(new_rev).ok_or_else(|| {
599 HgError::corrupted(format!("Revision {new_rev} out of range"))
604 HgError::corrupted(format!("Revision {new_rev} out of range"))
600 })?;
605 })?;
606 if current_rev.0 == NULL_REVISION.0 {
607 break;
608 }
601 entry = self.get_entry(current_rev).unwrap()
609 entry = self.get_entry(current_rev).unwrap()
602 }
610 }
603
611
@@ -745,6 +753,163 b' impl Index {'
745 .ok_or_else(|| RevlogError::corrupted("test"))?;
753 .ok_or_else(|| RevlogError::corrupted("test"))?;
746 self.is_snapshot_unchecked(rev)
754 self.is_snapshot_unchecked(rev)
747 }
755 }
756
757 /// Slice revs to reduce the amount of unrelated data to be read from disk.
758 ///
759 /// The index is sliced into groups that should be read in one time.
760 ///
761 /// The initial chunk is sliced until the overall density
762 /// (payload/chunks-span ratio) is above `target_density`.
763 /// No gap smaller than `min_gap_size` is skipped.
764 pub fn slice_chunk_to_density(
765 &self,
766 revs: &[Revision],
767 target_density: f64,
768 min_gap_size: usize,
769 ) -> Vec<Vec<Revision>> {
770 if revs.is_empty() {
771 return vec![];
772 }
773 if revs.len() == 1 {
774 return vec![revs.to_owned()];
775 }
776 let delta_chain_span = self.segment_span(revs);
777 if delta_chain_span < min_gap_size {
778 return vec![revs.to_owned()];
779 }
780 let entries: Vec<_> = revs
781 .iter()
782 .map(|r| {
783 (*r, self.get_entry(*r).unwrap_or_else(|| self.null_entry()))
784 })
785 .collect();
786
787 let mut read_data = delta_chain_span;
788 let chain_payload: u32 =
789 entries.iter().map(|(_r, e)| e.compressed_len()).sum();
790 let mut density = if delta_chain_span > 0 {
791 chain_payload as f64 / delta_chain_span as f64
792 } else {
793 1.0
794 };
795
796 if density >= target_density {
797 return vec![revs.to_owned()];
798 }
799
800 // Store the gaps in a heap to have them sorted by decreasing size
801 let mut gaps = Vec::new();
802 let mut previous_end = None;
803
804 for (i, (_rev, entry)) in entries.iter().enumerate() {
805 let start = entry.c_start() as usize;
806 let length = entry.compressed_len();
807
808 // Skip empty revisions to form larger holes
809 if length == 0 {
810 continue;
811 }
812
813 if let Some(end) = previous_end {
814 let gap_size = start - end;
815 // Only consider holes that are large enough
816 if gap_size > min_gap_size {
817 gaps.push((gap_size, i));
818 }
819 }
820 previous_end = Some(start + length as usize);
821 }
822 if gaps.is_empty() {
823 return vec![revs.to_owned()];
824 }
825 // sort the gaps to pop them from largest to small
826 gaps.sort_unstable();
827
828 // Collect the indices of the largest holes until
829 // the density is acceptable
830 let mut selected = vec![];
831 while let Some((gap_size, gap_id)) = gaps.pop() {
832 if density >= target_density {
833 break;
834 }
835 selected.push(gap_id);
836
837 // The gap sizes are stored as negatives to be sorted decreasingly
838 // by the heap
839 read_data -= gap_size;
840 density = if read_data > 0 {
841 chain_payload as f64 / read_data as f64
842 } else {
843 1.0
844 };
845 if density >= target_density {
846 break;
847 }
848 }
849 selected.sort_unstable();
850 selected.push(revs.len());
851
852 // Cut the revs at collected indices
853 let mut previous_idx = 0;
854 let mut chunks = vec![];
855 for idx in selected {
856 let chunk = self.trim_chunk(&entries, previous_idx, idx);
857 if !chunk.is_empty() {
858 chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect());
859 }
860 previous_idx = idx;
861 }
862 let chunk = self.trim_chunk(&entries, previous_idx, entries.len());
863 if !chunk.is_empty() {
864 chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect());
865 }
866
867 chunks
868 }
869
870 /// Get the byte span of a segment of sorted revisions.
871 ///
872 /// Occurrences of [`NULL_REVISION`] are ignored at the beginning of
873 /// the `revs` segment.
874 ///
875 /// panics:
876 /// - if `revs` is empty or only made of `NULL_REVISION`
877 /// - if cannot retrieve entry for the last or first not null element of
878 /// `revs`.
879 fn segment_span(&self, revs: &[Revision]) -> usize {
880 if revs.is_empty() {
881 return 0;
882 }
883 let last_entry = &self.get_entry(revs[revs.len() - 1]).unwrap();
884 let end = last_entry.c_start() + last_entry.compressed_len() as u64;
885 let first_rev = revs.iter().find(|r| r.0 != NULL_REVISION.0).unwrap();
886 let start = if (*first_rev).0 == 0 {
887 0
888 } else {
889 self.get_entry(*first_rev).unwrap().c_start()
890 };
891 (end - start) as usize
892 }
893
894 /// Returns `&revs[startidx..endidx]` without empty trailing revs
895 fn trim_chunk<'a>(
896 &'a self,
897 revs: &'a [(Revision, IndexEntry)],
898 start: usize,
899 mut end: usize,
900 ) -> &'a [(Revision, IndexEntry)] {
901 // Trim empty revs at the end, except the very first rev of a chain
902 let last_rev = revs[end - 1].0;
903 if last_rev.0 < self.len() as BaseRevision {
904 while end > 1
905 && end > start
906 && revs[end - 1].1.compressed_len() == 0
907 {
908 end -= 1
909 }
910 }
911 &revs[start..end]
912 }
748 }
913 }
749 fn inline_scan(bytes: &[u8]) -> (usize, Vec<usize>) {
914 fn inline_scan(bytes: &[u8]) -> (usize, Vec<usize>) {
750 let mut offset: usize = 0;
915 let mut offset: usize = 0;
@@ -807,6 +972,11 b" impl<'a> IndexEntry<'a> {"
807 BigEndian::read_u64(&self.bytes[0..8])
972 BigEndian::read_u64(&self.bytes[0..8])
808 }
973 }
809
974
975 /// Same result (except potentially for rev 0) as C `index_get_start()`
976 fn c_start(&self) -> u64 {
977 self.raw_offset() >> 16
978 }
979
810 pub fn flags(&self) -> u16 {
980 pub fn flags(&self) -> u16 {
811 BigEndian::read_u16(&self.bytes[6..=7])
981 BigEndian::read_u16(&self.bytes[6..=7])
812 }
982 }
@@ -357,7 +357,37 b' py_class!(pub class MixedIndex |py| {'
357
357
358 /// slice planned chunk read to reach a density threshold
358 /// slice planned chunk read to reach a density threshold
359 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
359 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
360 self.call_cindex(py, "slicechunktodensity", args, kw)
360 let rust_res = self.inner_slicechunktodensity(
361 py,
362 args.get_item(py, 0),
363 args.get_item(py, 1).extract(py)?,
364 args.get_item(py, 2).extract(py)?
365 )?;
366
367 let c_res = self.call_cindex(py, "slicechunktodensity", args, kw)?;
368 assert_eq!(
369 rust_res.len(),
370 c_res.len(py)?,
371 "chunks differ {:?} {}",
372 rust_res, c_res
373 );
374 for (i, chunk) in rust_res.iter().enumerate() {
375 let c_chunk = c_res.get_item(py, i)?;
376 assert_eq!(
377 chunk.len(),
378 c_chunk.len(py)?,
379 "chunk {} length differ {:?} {}",
380 i,
381 chunk,
382 c_res
383 );
384 for (j, rev) in chunk.iter().enumerate() {
385 let c_chunk: BaseRevision
386 = c_chunk.get_item(py, j)?.extract(py)?;
387 assert_eq!(c_chunk, rev.0);
388 }
389 }
390 Ok(c_res)
361 }
391 }
362
392
363 /// stats for the index
393 /// stats for the index
@@ -819,6 +849,18 b' impl MixedIndex {'
819 .collect();
849 .collect();
820 Ok(PyList::new(py, &as_vec).into_object())
850 Ok(PyList::new(py, &as_vec).into_object())
821 }
851 }
852
853 fn inner_slicechunktodensity(
854 &self,
855 py: Python,
856 revs: PyObject,
857 target_density: f64,
858 min_gap_size: usize,
859 ) -> PyResult<Vec<Vec<Revision>>> {
860 let index = &mut *self.index(py).borrow_mut();
861 let revs: Vec<_> = rev_pyiter_collect(py, &revs, index)?;
862 Ok(index.slice_chunk_to_density(&revs, target_density, min_gap_size))
863 }
822 }
864 }
823
865
824 fn revlog_error(py: Python) -> PyErr {
866 fn revlog_error(py: Python) -> PyErr {
General Comments 0
You need to be logged in to leave comments. Login now