upstream/mercurial-mirror Commit - r52111:0112803e

rust-index: add support for `_slicechunktodensity`

Raphaël Gomès -

r52111:0112803e default

parent child

rust/hg-core/src/revlog/index.rs

0 +173 -3

@@ -20,6 +20,7 b' use crate::{'
20	pub const INDEX_ENTRY_SIZE: usize = 64;	20	pub const INDEX_ENTRY_SIZE: usize = 64;
21	pub const COMPRESSION_MODE_INLINE: u8 = 2;	21	pub const COMPRESSION_MODE_INLINE: u8 = 2;
22		22
		23	#[derive(Debug)]
23	pub struct IndexHeader {	24	pub struct IndexHeader {
24	pub(super) header_bytes: [u8; 4],	25	pub(super) header_bytes: [u8; 4],
25	}	26	}
@@ -523,6 +524,13 b' impl Index {'
523	}	524	}
524	}	525	}
525		526
		527	fn null_entry(&self) -> IndexEntry {
		528	IndexEntry {
		529	bytes: &[0; INDEX_ENTRY_SIZE],
		530	offset_override: Some(0),
		531	}
		532	}
		533
526	/// Return the head revisions of this index	534	/// Return the head revisions of this index
527	pub fn head_revs(&mut self) -> Result<Vec<Revision>, GraphError> {	535	pub fn head_revs(&mut self) -> Result<Vec<Revision>, GraphError> {
528	self.head_revs_filtered(&HashSet::new())	536	self.head_revs_filtered(&HashSet::new())
@@ -592,12 +600,12 b' impl Index {'
592	} else {	600	} else {
593	UncheckedRevision(current_rev.0 - 1)	601	UncheckedRevision(current_rev.0 - 1)
594	};	602	};
595	if new_rev.0 == NULL_REVISION.0 {
596	break;
597	}
598	current_rev = self.check_revision(new_rev).ok_or_else(\|\| {	603	current_rev = self.check_revision(new_rev).ok_or_else(\|\| {
599	HgError::corrupted(format!("Revision {new_rev} out of range"))	604	HgError::corrupted(format!("Revision {new_rev} out of range"))
600	})?;	605	})?;
		606	if current_rev.0 == NULL_REVISION.0 {
		607	break;
		608	}
601	entry = self.get_entry(current_rev).unwrap()	609	entry = self.get_entry(current_rev).unwrap()
602	}	610	}
603		611
@@ -745,6 +753,163 b' impl Index {'
745	.ok_or_else(\|\| RevlogError::corrupted("test"))?;	753	.ok_or_else(\|\| RevlogError::corrupted("test"))?;
746	self.is_snapshot_unchecked(rev)	754	self.is_snapshot_unchecked(rev)
747	}	755	}
		756
		757	/// Slice revs to reduce the amount of unrelated data to be read from disk.
		758	///
		759	/// The index is sliced into groups that should be read in one time.
		760	///
		761	/// The initial chunk is sliced until the overall density
		762	/// (payload/chunks-span ratio) is above `target_density`.
		763	/// No gap smaller than `min_gap_size` is skipped.
		764	pub fn slice_chunk_to_density(
		765	&self,
		766	revs: &[Revision],
		767	target_density: f64,
		768	min_gap_size: usize,
		769	) -> Vec<Vec<Revision>> {
		770	if revs.is_empty() {
		771	return vec![];
		772	}
		773	if revs.len() == 1 {
		774	return vec![revs.to_owned()];
		775	}
		776	let delta_chain_span = self.segment_span(revs);
		777	if delta_chain_span < min_gap_size {
		778	return vec![revs.to_owned()];
		779	}
		780	let entries: Vec<_> = revs
		781	.iter()
		782	.map(\|r\| {
		783	(r, self.get_entry(r).unwrap_or_else(\|\| self.null_entry()))
		784	})
		785	.collect();
		786
		787	let mut read_data = delta_chain_span;
		788	let chain_payload: u32 =
		789	entries.iter().map(\|(_r, e)\| e.compressed_len()).sum();
		790	let mut density = if delta_chain_span > 0 {
		791	chain_payload as f64 / delta_chain_span as f64
		792	} else {
		793	1.0
		794	};
		795
		796	if density >= target_density {
		797	return vec![revs.to_owned()];
		798	}
		799
		800	// Store the gaps in a heap to have them sorted by decreasing size
		801	let mut gaps = Vec::new();
		802	let mut previous_end = None;
		803
		804	for (i, (_rev, entry)) in entries.iter().enumerate() {
		805	let start = entry.c_start() as usize;
		806	let length = entry.compressed_len();
		807
		808	// Skip empty revisions to form larger holes
		809	if length == 0 {
		810	continue;
		811	}
		812
		813	if let Some(end) = previous_end {
		814	let gap_size = start - end;
		815	// Only consider holes that are large enough
		816	if gap_size > min_gap_size {
		817	gaps.push((gap_size, i));
		818	}
		819	}
		820	previous_end = Some(start + length as usize);
		821	}
		822	if gaps.is_empty() {
		823	return vec![revs.to_owned()];
		824	}
		825	// sort the gaps to pop them from largest to small
		826	gaps.sort_unstable();
		827
		828	// Collect the indices of the largest holes until
		829	// the density is acceptable
		830	let mut selected = vec![];
		831	while let Some((gap_size, gap_id)) = gaps.pop() {
		832	if density >= target_density {
		833	break;
		834	}
		835	selected.push(gap_id);
		836
		837	// The gap sizes are stored as negatives to be sorted decreasingly
		838	// by the heap
		839	read_data -= gap_size;
		840	density = if read_data > 0 {
		841	chain_payload as f64 / read_data as f64
		842	} else {
		843	1.0
		844	};
		845	if density >= target_density {
		846	break;
		847	}
		848	}
		849	selected.sort_unstable();
		850	selected.push(revs.len());
		851
		852	// Cut the revs at collected indices
		853	let mut previous_idx = 0;
		854	let mut chunks = vec![];
		855	for idx in selected {
		856	let chunk = self.trim_chunk(&entries, previous_idx, idx);
		857	if !chunk.is_empty() {
		858	chunks.push(chunk.iter().map(\|(rev, _entry)\| *rev).collect());
		859	}
		860	previous_idx = idx;
		861	}
		862	let chunk = self.trim_chunk(&entries, previous_idx, entries.len());
		863	if !chunk.is_empty() {
		864	chunks.push(chunk.iter().map(\|(rev, _entry)\| *rev).collect());
		865	}
		866
		867	chunks
		868	}
		869
		870	/// Get the byte span of a segment of sorted revisions.
		871	///
		872	/// Occurrences of [`NULL_REVISION`] are ignored at the beginning of
		873	/// the `revs` segment.
		874	///
		875	/// panics:
		876	/// - if `revs` is empty or only made of `NULL_REVISION`
		877	/// - if cannot retrieve entry for the last or first not null element of
		878	/// `revs`.
		879	fn segment_span(&self, revs: &[Revision]) -> usize {
		880	if revs.is_empty() {
		881	return 0;
		882	}
		883	let last_entry = &self.get_entry(revs[revs.len() - 1]).unwrap();
		884	let end = last_entry.c_start() + last_entry.compressed_len() as u64;
		885	let first_rev = revs.iter().find(\|r\| r.0 != NULL_REVISION.0).unwrap();
		886	let start = if (*first_rev).0 == 0 {
		887	0
		888	} else {
		889	self.get_entry(*first_rev).unwrap().c_start()
		890	};
		891	(end - start) as usize
		892	}
		893
		894	/// Returns `&revs[startidx..endidx]` without empty trailing revs
		895	fn trim_chunk<'a>(
		896	&'a self,
		897	revs: &'a [(Revision, IndexEntry)],
		898	start: usize,
		899	mut end: usize,
		900	) -> &'a [(Revision, IndexEntry)] {
		901	// Trim empty revs at the end, except the very first rev of a chain
		902	let last_rev = revs[end - 1].0;
		903	if last_rev.0 < self.len() as BaseRevision {
		904	while end > 1
		905	&& end > start
		906	&& revs[end - 1].1.compressed_len() == 0
		907	{
		908	end -= 1
		909	}
		910	}
		911	&revs[start..end]
		912	}
748	}	913	}
749	fn inline_scan(bytes: &[u8]) -> (usize, Vec<usize>) {	914	fn inline_scan(bytes: &[u8]) -> (usize, Vec<usize>) {
750	let mut offset: usize = 0;	915	let mut offset: usize = 0;
@@ -807,6 +972,11 b" impl<'a> IndexEntry<'a> {"
807	BigEndian::read_u64(&self.bytes[0..8])	972	BigEndian::read_u64(&self.bytes[0..8])
808	}	973	}
809		974
		975	/// Same result (except potentially for rev 0) as C `index_get_start()`
		976	fn c_start(&self) -> u64 {
		977	self.raw_offset() >> 16
		978	}
		979
810	pub fn flags(&self) -> u16 {	980	pub fn flags(&self) -> u16 {
811	BigEndian::read_u16(&self.bytes[6..=7])	981	BigEndian::read_u16(&self.bytes[6..=7])
812	}	982	}

rust/hg-cpython/src/revlog.rs

0 +43 -1

@@ -357,7 +357,37 b' py_class!(pub class MixedIndex \|py\| {'
357		357
358	/// slice planned chunk read to reach a density threshold	358	/// slice planned chunk read to reach a density threshold
359	def slicechunktodensity(&self, args, *kw) -> PyResult<PyObject> {	359	def slicechunktodensity(&self, args, *kw) -> PyResult<PyObject> {
360	self.~~call_cindex~~(py, ~~"slicechunktodensity"~~, ~~args~~, kw)	360	let rust_res = self.inner_slicechunktodensity(
		361	py,
		362	args.get_item(py, 0),
		363	args.get_item(py, 1).extract(py)?,
		364	args.get_item(py, 2).extract(py)?
		365	)?;
		366
		367	let c_res = self.call_cindex(py, "slicechunktodensity", args, kw)?;
		368	assert_eq!(
		369	rust_res.len(),
		370	c_res.len(py)?,
		371	"chunks differ {:?} {}",
		372	rust_res, c_res
		373	);
		374	for (i, chunk) in rust_res.iter().enumerate() {
		375	let c_chunk = c_res.get_item(py, i)?;
		376	assert_eq!(
		377	chunk.len(),
		378	c_chunk.len(py)?,
		379	"chunk {} length differ {:?} {}",
		380	i,
		381	chunk,
		382	c_res
		383	);
		384	for (j, rev) in chunk.iter().enumerate() {
		385	let c_chunk: BaseRevision
		386	= c_chunk.get_item(py, j)?.extract(py)?;
		387	assert_eq!(c_chunk, rev.0);
		388	}
		389	}
		390	Ok(c_res)
361	}	391	}
362		392
363	/// stats for the index	393	/// stats for the index
@@ -819,6 +849,18 b' impl MixedIndex {'
819	.collect();	849	.collect();
820	Ok(PyList::new(py, &as_vec).into_object())	850	Ok(PyList::new(py, &as_vec).into_object())
821	}	851	}
		852
		853	fn inner_slicechunktodensity(
		854	&self,
		855	py: Python,
		856	revs: PyObject,
		857	target_density: f64,
		858	min_gap_size: usize,
		859	) -> PyResult<Vec<Vec<Revision>>> {
		860	let index = &mut *self.index(py).borrow_mut();
		861	let revs: Vec<_> = rev_pyiter_collect(py, &revs, index)?;
		862	Ok(index.slice_chunk_to_density(&revs, target_density, min_gap_size))
		863	}
822	}	864	}
823		865
824	fn revlog_error(py: Python) -> PyErr {	866	fn revlog_error(py: Python) -> PyErr {

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages