|
|
//! A layer of lower-level revlog functionality to encapsulate most of the
|
|
|
//! IO work and expensive operations.
|
|
|
use std::{
|
|
|
borrow::Cow,
|
|
|
cell::RefCell,
|
|
|
io::{ErrorKind, Seek, SeekFrom, Write},
|
|
|
ops::Deref,
|
|
|
path::PathBuf,
|
|
|
sync::{Arc, Mutex},
|
|
|
};
|
|
|
|
|
|
use schnellru::{ByMemoryUsage, LruMap};
|
|
|
use sha1::{Digest, Sha1};
|
|
|
|
|
|
use crate::{
|
|
|
errors::{HgError, IoResultExt},
|
|
|
exit_codes,
|
|
|
transaction::Transaction,
|
|
|
vfs::Vfs,
|
|
|
};
|
|
|
|
|
|
use super::{
|
|
|
compression::{
|
|
|
uncompressed_zstd_data, CompressionConfig, Compressor, NoneCompressor,
|
|
|
ZlibCompressor, ZstdCompressor, ZLIB_BYTE, ZSTD_BYTE,
|
|
|
},
|
|
|
file_io::{DelayedBuffer, FileHandle, RandomAccessFile, WriteHandles},
|
|
|
index::{Index, IndexHeader, INDEX_ENTRY_SIZE},
|
|
|
node::{NODE_BYTES_LENGTH, NULL_NODE},
|
|
|
options::{RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig},
|
|
|
BaseRevision, Node, Revision, RevlogEntry, RevlogError, RevlogIndex,
|
|
|
UncheckedRevision, NULL_REVISION, NULL_REVLOG_ENTRY_FLAGS,
|
|
|
};
|
|
|
|
|
|
/// Matches the `_InnerRevlog` class in the Python code, as an arbitrary
|
|
|
/// boundary to incrementally rewrite higher-level revlog functionality in
|
|
|
/// Rust.
|
|
|
pub struct InnerRevlog {
|
|
|
/// When index and data are not interleaved: bytes of the revlog index.
|
|
|
/// When index and data are interleaved (inline revlog): bytes of the
|
|
|
/// revlog index and data.
|
|
|
pub index: Index,
|
|
|
/// The store vfs that is used to interact with the filesystem
|
|
|
vfs: Box<dyn Vfs>,
|
|
|
/// The index file path, relative to the vfs root
|
|
|
pub index_file: PathBuf,
|
|
|
/// The data file path, relative to the vfs root (same as `index_file`
|
|
|
/// if inline)
|
|
|
data_file: PathBuf,
|
|
|
/// Data config that applies to this revlog
|
|
|
data_config: RevlogDataConfig,
|
|
|
/// Delta config that applies to this revlog
|
|
|
delta_config: RevlogDeltaConfig,
|
|
|
/// Feature config that applies to this revlog
|
|
|
feature_config: RevlogFeatureConfig,
|
|
|
/// A view into this revlog's data file
|
|
|
segment_file: RandomAccessFile,
|
|
|
/// A cache of uncompressed chunks that have previously been restored.
|
|
|
/// Its eviction policy is defined in [`Self::new`].
|
|
|
uncompressed_chunk_cache: Option<UncompressedChunkCache>,
|
|
|
/// Used to keep track of the actual target during diverted writes
|
|
|
/// for the changelog
|
|
|
original_index_file: Option<PathBuf>,
|
|
|
/// Write handles to the index and data files
|
|
|
/// XXX why duplicate from `index` and `segment_file`?
|
|
|
writing_handles: Option<WriteHandles>,
|
|
|
/// See [`DelayedBuffer`].
|
|
|
delayed_buffer: Option<Arc<Mutex<DelayedBuffer>>>,
|
|
|
/// Whether this revlog is inline. XXX why duplicate from `index`?
|
|
|
pub inline: bool,
|
|
|
/// A cache of the last revision, which is usually accessed multiple
|
|
|
/// times.
|
|
|
pub last_revision_cache: Mutex<Option<SingleRevisionCache>>,
|
|
|
}
|
|
|
|
|
|
impl InnerRevlog {
|
|
|
pub fn new(
|
|
|
vfs: Box<dyn Vfs>,
|
|
|
index: Index,
|
|
|
index_file: PathBuf,
|
|
|
data_file: PathBuf,
|
|
|
data_config: RevlogDataConfig,
|
|
|
delta_config: RevlogDeltaConfig,
|
|
|
feature_config: RevlogFeatureConfig,
|
|
|
) -> Self {
|
|
|
assert!(index_file.is_relative());
|
|
|
assert!(data_file.is_relative());
|
|
|
let segment_file = RandomAccessFile::new(
|
|
|
dyn_clone::clone_box(&*vfs),
|
|
|
if index.is_inline() {
|
|
|
index_file.to_owned()
|
|
|
} else {
|
|
|
data_file.to_owned()
|
|
|
},
|
|
|
);
|
|
|
|
|
|
let uncompressed_chunk_cache =
|
|
|
data_config.uncompressed_cache_factor.map(
|
|
|
// Arbitrary initial value
|
|
|
// TODO check if using a hasher specific to integers is useful
|
|
|
|_factor| RefCell::new(LruMap::with_memory_budget(65536)),
|
|
|
);
|
|
|
|
|
|
let inline = index.is_inline();
|
|
|
Self {
|
|
|
index,
|
|
|
vfs,
|
|
|
index_file,
|
|
|
data_file,
|
|
|
data_config,
|
|
|
delta_config,
|
|
|
feature_config,
|
|
|
segment_file,
|
|
|
uncompressed_chunk_cache,
|
|
|
original_index_file: None,
|
|
|
writing_handles: None,
|
|
|
delayed_buffer: None,
|
|
|
inline,
|
|
|
last_revision_cache: Mutex::new(None),
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/// Return number of entries of the revlog index
|
|
|
pub fn len(&self) -> usize {
|
|
|
self.index.len()
|
|
|
}
|
|
|
|
|
|
/// Return `true` if this revlog has no entries
|
|
|
pub fn is_empty(&self) -> bool {
|
|
|
self.len() == 0
|
|
|
}
|
|
|
|
|
|
/// Return whether this revlog is inline (mixed index and data)
|
|
|
pub fn is_inline(&self) -> bool {
|
|
|
self.inline
|
|
|
}
|
|
|
|
|
|
/// Clear all caches from this revlog
|
|
|
pub fn clear_cache(&mut self) {
|
|
|
assert!(!self.is_delaying());
|
|
|
if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
|
|
|
// We don't clear the allocation here because it's probably faster.
|
|
|
// We could change our minds later if this ends up being a problem
|
|
|
// with regards to memory consumption.
|
|
|
cache.borrow_mut().clear();
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/// Return an entry for the null revision
|
|
|
pub fn make_null_entry(&self) -> RevlogEntry {
|
|
|
RevlogEntry {
|
|
|
revlog: self,
|
|
|
rev: NULL_REVISION,
|
|
|
uncompressed_len: 0,
|
|
|
p1: NULL_REVISION,
|
|
|
p2: NULL_REVISION,
|
|
|
flags: NULL_REVLOG_ENTRY_FLAGS,
|
|
|
hash: NULL_NODE,
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/// Return the [`RevlogEntry`] for a [`Revision`] that is known to exist
|
|
|
pub fn get_entry_for_checked_rev(
|
|
|
&self,
|
|
|
rev: Revision,
|
|
|
) -> Result<RevlogEntry, RevlogError> {
|
|
|
if rev == NULL_REVISION {
|
|
|
return Ok(self.make_null_entry());
|
|
|
}
|
|
|
let index_entry = self
|
|
|
.index
|
|
|
.get_entry(rev)
|
|
|
.ok_or_else(|| RevlogError::InvalidRevision(rev.to_string()))?;
|
|
|
let p1 =
|
|
|
self.index.check_revision(index_entry.p1()).ok_or_else(|| {
|
|
|
RevlogError::corrupted(format!(
|
|
|
"p1 for rev {} is invalid",
|
|
|
rev
|
|
|
))
|
|
|
})?;
|
|
|
let p2 =
|
|
|
self.index.check_revision(index_entry.p2()).ok_or_else(|| {
|
|
|
RevlogError::corrupted(format!(
|
|
|
"p2 for rev {} is invalid",
|
|
|
rev
|
|
|
))
|
|
|
})?;
|
|
|
let entry = RevlogEntry {
|
|
|
revlog: self,
|
|
|
rev,
|
|
|
uncompressed_len: index_entry.uncompressed_len(),
|
|
|
p1,
|
|
|
p2,
|
|
|
flags: index_entry.flags(),
|
|
|
hash: *index_entry.hash(),
|
|
|
};
|
|
|
Ok(entry)
|
|
|
}
|
|
|
|
|
|
/// Return the [`RevlogEntry`] for `rev`. If `rev` fails to check, this
|
|
|
/// returns a [`RevlogError`].
|
|
|
/// TODO normalize naming across the index and all revlogs
|
|
|
/// (changelog, etc.) so that `get_entry` is always on an unchecked rev and
|
|
|
/// `get_entry_for_checked_rev` is for checked rev
|
|
|
pub fn get_entry(
|
|
|
&self,
|
|
|
rev: UncheckedRevision,
|
|
|
) -> Result<RevlogEntry, RevlogError> {
|
|
|
if rev == NULL_REVISION.into() {
|
|
|
return Ok(self.make_null_entry());
|
|
|
}
|
|
|
let rev = self.index.check_revision(rev).ok_or_else(|| {
|
|
|
RevlogError::corrupted(format!("rev {} is invalid", rev))
|
|
|
})?;
|
|
|
self.get_entry_for_checked_rev(rev)
|
|
|
}
|
|
|
|
|
|
/// Is the revlog currently delaying the visibility of written data?
|
|
|
///
|
|
|
/// The delaying mechanism can be either in-memory or written on disk in a
|
|
|
/// side-file.
|
|
|
pub fn is_delaying(&self) -> bool {
|
|
|
self.delayed_buffer.is_some() || self.original_index_file.is_some()
|
|
|
}
|
|
|
|
|
|
/// The offset of the data chunk for this revision
|
|
|
#[inline(always)]
|
|
|
pub fn start(&self, rev: Revision) -> usize {
|
|
|
self.index.start(
|
|
|
rev,
|
|
|
&self
|
|
|
.index
|
|
|
.get_entry(rev)
|
|
|
.unwrap_or_else(|| self.index.make_null_entry()),
|
|
|
)
|
|
|
}
|
|
|
|
|
|
/// The length of the data chunk for this revision
|
|
|
/// TODO rename this method and others to more explicit names than the
|
|
|
/// existing ones that were copied over from Python
|
|
|
#[inline(always)]
|
|
|
pub fn length(&self, rev: Revision) -> usize {
|
|
|
self.index
|
|
|
.get_entry(rev)
|
|
|
.unwrap_or_else(|| self.index.make_null_entry())
|
|
|
.compressed_len() as usize
|
|
|
}
|
|
|
|
|
|
/// The end of the data chunk for this revision
|
|
|
#[inline(always)]
|
|
|
pub fn end(&self, rev: Revision) -> usize {
|
|
|
self.start(rev) + self.length(rev)
|
|
|
}
|
|
|
|
|
|
/// Return the delta parent of the given revision
|
|
|
pub fn delta_parent(&self, rev: Revision) -> Revision {
|
|
|
let base = self
|
|
|
.index
|
|
|
.get_entry(rev)
|
|
|
.unwrap()
|
|
|
.base_revision_or_base_of_delta_chain();
|
|
|
if base.0 == rev.0 {
|
|
|
NULL_REVISION
|
|
|
} else if self.delta_config.general_delta {
|
|
|
Revision(base.0)
|
|
|
} else {
|
|
|
Revision(rev.0 - 1)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/// Return whether `rev` points to a snapshot revision (i.e. does not have
|
|
|
/// a delta base).
|
|
|
pub fn is_snapshot(&self, rev: Revision) -> Result<bool, RevlogError> {
|
|
|
if !self.delta_config.sparse_revlog {
|
|
|
return Ok(self.delta_parent(rev) == NULL_REVISION);
|
|
|
}
|
|
|
self.index.is_snapshot_unchecked(rev)
|
|
|
}
|
|
|
|
|
|
/// Return the delta chain for `rev` according to this revlog's config.
|
|
|
/// See [`Index::delta_chain`] for more information.
|
|
|
pub fn delta_chain(
|
|
|
&self,
|
|
|
rev: Revision,
|
|
|
stop_rev: Option<Revision>,
|
|
|
) -> Result<(Vec<Revision>, bool), HgError> {
|
|
|
self.index.delta_chain(
|
|
|
rev,
|
|
|
stop_rev,
|
|
|
self.delta_config.general_delta.into(),
|
|
|
)
|
|
|
}
|
|
|
|
|
|
fn compressor(&self) -> Result<Box<dyn Compressor>, HgError> {
|
|
|
// TODO cache the compressor?
|
|
|
Ok(match self.feature_config.compression_engine {
|
|
|
CompressionConfig::Zlib { level } => {
|
|
|
Box::new(ZlibCompressor::new(level))
|
|
|
}
|
|
|
CompressionConfig::Zstd { level, threads } => {
|
|
|
Box::new(ZstdCompressor::new(level, threads))
|
|
|
}
|
|
|
CompressionConfig::None => Box::new(NoneCompressor),
|
|
|
})
|
|
|
}
|
|
|
|
|
|
/// Generate a possibly-compressed representation of data.
|
|
|
/// Returns `None` if the data was not compressed.
|
|
|
pub fn compress<'data>(
|
|
|
&self,
|
|
|
data: &'data [u8],
|
|
|
) -> Result<Option<Cow<'data, [u8]>>, RevlogError> {
|
|
|
if data.is_empty() {
|
|
|
return Ok(Some(data.into()));
|
|
|
}
|
|
|
let res = self.compressor()?.compress(data)?;
|
|
|
if let Some(compressed) = res {
|
|
|
// The revlog compressor added the header in the returned data.
|
|
|
return Ok(Some(compressed.into()));
|
|
|
}
|
|
|
|
|
|
if data[0] == b'\0' {
|
|
|
return Ok(Some(data.into()));
|
|
|
}
|
|
|
Ok(None)
|
|
|
}
|
|
|
|
|
|
/// Decompress a revlog chunk.
|
|
|
///
|
|
|
/// The chunk is expected to begin with a header identifying the
|
|
|
/// format type so it can be routed to an appropriate decompressor.
|
|
|
pub fn decompress<'a>(
|
|
|
&'a self,
|
|
|
data: &'a [u8],
|
|
|
) -> Result<Cow<[u8]>, RevlogError> {
|
|
|
if data.is_empty() {
|
|
|
return Ok(data.into());
|
|
|
}
|
|
|
|
|
|
// Revlogs are read much more frequently than they are written and many
|
|
|
// chunks only take microseconds to decompress, so performance is
|
|
|
// important here.
|
|
|
|
|
|
let header = data[0];
|
|
|
match header {
|
|
|
// Settings don't matter as they only affect compression
|
|
|
ZLIB_BYTE => Ok(ZlibCompressor::new(0).decompress(data)?.into()),
|
|
|
// Settings don't matter as they only affect compression
|
|
|
ZSTD_BYTE => {
|
|
|
Ok(ZstdCompressor::new(0, 0).decompress(data)?.into())
|
|
|
}
|
|
|
b'\0' => Ok(data.into()),
|
|
|
b'u' => Ok((&data[1..]).into()),
|
|
|
other => Err(HgError::UnsupportedFeature(format!(
|
|
|
"unknown compression header '{}'",
|
|
|
other
|
|
|
))
|
|
|
.into()),
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/// Obtain a segment of raw data corresponding to a range of revisions.
|
|
|
///
|
|
|
/// Requests for data may be satisfied by a cache.
|
|
|
///
|
|
|
/// Returns a 2-tuple of (offset, data) for the requested range of
|
|
|
/// revisions. Offset is the integer offset from the beginning of the
|
|
|
/// revlog and data is a slice of the raw byte data.
|
|
|
///
|
|
|
/// Callers will need to call `self.start(rev)` and `self.length(rev)`
|
|
|
/// to determine where each revision's data begins and ends.
|
|
|
pub fn get_segment_for_revs(
|
|
|
&self,
|
|
|
start_rev: Revision,
|
|
|
end_rev: Revision,
|
|
|
) -> Result<(usize, Vec<u8>), HgError> {
|
|
|
let start = if start_rev == NULL_REVISION {
|
|
|
0
|
|
|
} else {
|
|
|
let start_entry = self
|
|
|
.index
|
|
|
.get_entry(start_rev)
|
|
|
.expect("null revision segment");
|
|
|
self.index.start(start_rev, &start_entry)
|
|
|
};
|
|
|
let end_entry = self
|
|
|
.index
|
|
|
.get_entry(end_rev)
|
|
|
.expect("null revision segment");
|
|
|
let end = self.index.start(end_rev, &end_entry) + self.length(end_rev);
|
|
|
|
|
|
let length = end - start;
|
|
|
|
|
|
// XXX should we use mmap instead of doing this for platforms that
|
|
|
// support madvise/populate?
|
|
|
Ok((start, self.segment_file.read_chunk(start, length)?))
|
|
|
}
|
|
|
|
|
|
/// Return the uncompressed raw data for `rev`
|
|
|
pub fn chunk_for_rev(&self, rev: Revision) -> Result<Arc<[u8]>, HgError> {
|
|
|
if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
|
|
|
if let Some(chunk) = cache.borrow_mut().get(&rev) {
|
|
|
return Ok(chunk.clone());
|
|
|
}
|
|
|
}
|
|
|
// TODO revlogv2 should check the compression mode
|
|
|
let data = self.get_segment_for_revs(rev, rev)?.1;
|
|
|
let uncompressed = self.decompress(&data).map_err(|e| {
|
|
|
HgError::abort(
|
|
|
format!("revlog decompression error: {}", e),
|
|
|
exit_codes::ABORT,
|
|
|
None,
|
|
|
)
|
|
|
})?;
|
|
|
let uncompressed: Arc<[u8]> = Arc::from(uncompressed.into_owned());
|
|
|
if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
|
|
|
cache.borrow_mut().insert(rev, uncompressed.clone());
|
|
|
}
|
|
|
Ok(uncompressed)
|
|
|
}
|
|
|
|
|
|
/// Execute `func` within a read context for the data file, meaning that
|
|
|
/// the read handle will be taken and discarded after the operation.
|
|
|
pub fn with_read<R>(
|
|
|
&self,
|
|
|
func: impl FnOnce() -> Result<R, RevlogError>,
|
|
|
) -> Result<R, RevlogError> {
|
|
|
self.enter_reading_context()?;
|
|
|
let res = func();
|
|
|
self.exit_reading_context();
|
|
|
res.map_err(Into::into)
|
|
|
}
|
|
|
|
|
|
/// `pub` only for use in hg-cpython
|
|
|
#[doc(hidden)]
|
|
|
pub fn enter_reading_context(&self) -> Result<(), HgError> {
|
|
|
if self.is_empty() {
|
|
|
// Nothing to be read
|
|
|
return Ok(());
|
|
|
}
|
|
|
if self.delayed_buffer.is_some() && self.is_inline() {
|
|
|
return Err(HgError::abort(
|
|
|
"revlog with delayed write should not be inline",
|
|
|
exit_codes::ABORT,
|
|
|
None,
|
|
|
));
|
|
|
}
|
|
|
self.segment_file.get_read_handle()?;
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
/// `pub` only for use in hg-cpython
|
|
|
#[doc(hidden)]
|
|
|
pub fn exit_reading_context(&self) {
|
|
|
self.segment_file.exit_reading_context()
|
|
|
}
|
|
|
|
|
|
/// Fill the buffer returned by `get_buffer` with the possibly un-validated
|
|
|
/// raw text for a revision. It can be already validated if it comes
|
|
|
/// from the cache.
|
|
|
pub fn raw_text<G, T>(
|
|
|
&self,
|
|
|
rev: Revision,
|
|
|
get_buffer: G,
|
|
|
) -> Result<(), RevlogError>
|
|
|
where
|
|
|
G: FnOnce(
|
|
|
usize,
|
|
|
&mut dyn FnMut(
|
|
|
&mut dyn RevisionBuffer<Target = T>,
|
|
|
) -> Result<(), RevlogError>,
|
|
|
) -> Result<(), RevlogError>,
|
|
|
{
|
|
|
let entry = &self.get_entry_for_checked_rev(rev)?;
|
|
|
let raw_size = entry.uncompressed_len();
|
|
|
let mut mutex_guard = self
|
|
|
.last_revision_cache
|
|
|
.lock()
|
|
|
.expect("lock should not be held");
|
|
|
let cached_rev = if let Some((_node, rev, data)) = &*mutex_guard {
|
|
|
Some((*rev, data.deref().as_ref()))
|
|
|
} else {
|
|
|
None
|
|
|
};
|
|
|
if let Some(cache) = &self.uncompressed_chunk_cache {
|
|
|
let cache = &mut cache.borrow_mut();
|
|
|
if let Some(size) = raw_size {
|
|
|
// Dynamically update the uncompressed_chunk_cache size to the
|
|
|
// largest revision we've seen in this revlog.
|
|
|
// Do it *before* restoration in case the current revision
|
|
|
// is the largest.
|
|
|
let factor = self
|
|
|
.data_config
|
|
|
.uncompressed_cache_factor
|
|
|
.expect("cache should not exist without factor");
|
|
|
let candidate_size = (size as f64 * factor) as usize;
|
|
|
let limiter_mut = cache.limiter_mut();
|
|
|
if candidate_size > limiter_mut.max_memory_usage() {
|
|
|
std::mem::swap(
|
|
|
limiter_mut,
|
|
|
&mut ByMemoryUsage::new(candidate_size),
|
|
|
);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
entry.rawdata(cached_rev, get_buffer)?;
|
|
|
// drop cache to save memory, the caller is expected to update
|
|
|
// the revision cache after validating the text
|
|
|
mutex_guard.take();
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
/// Only `pub` for `hg-cpython`.
|
|
|
/// Obtain decompressed raw data for the specified revisions that are
|
|
|
/// assumed to be in ascending order.
|
|
|
///
|
|
|
/// Returns a list with decompressed data for each requested revision.
|
|
|
#[doc(hidden)]
|
|
|
pub fn chunks(
|
|
|
&self,
|
|
|
revs: Vec<Revision>,
|
|
|
target_size: Option<u64>,
|
|
|
) -> Result<Vec<Arc<[u8]>>, RevlogError> {
|
|
|
if revs.is_empty() {
|
|
|
return Ok(vec![]);
|
|
|
}
|
|
|
let mut fetched_revs = vec![];
|
|
|
let mut chunks = Vec::with_capacity(revs.len());
|
|
|
|
|
|
match self.uncompressed_chunk_cache.as_ref() {
|
|
|
Some(cache) => {
|
|
|
let mut cache = cache.borrow_mut();
|
|
|
for rev in revs.iter() {
|
|
|
match cache.get(rev) {
|
|
|
Some(hit) => chunks.push((*rev, hit.to_owned())),
|
|
|
None => fetched_revs.push(*rev),
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
None => fetched_revs = revs,
|
|
|
}
|
|
|
|
|
|
let already_cached = chunks.len();
|
|
|
|
|
|
let sliced_chunks = if fetched_revs.is_empty() {
|
|
|
vec![]
|
|
|
} else if !self.data_config.with_sparse_read || self.is_inline() {
|
|
|
vec![fetched_revs]
|
|
|
} else {
|
|
|
self.slice_chunk(&fetched_revs, target_size)?
|
|
|
};
|
|
|
|
|
|
self.with_read(|| {
|
|
|
for revs_chunk in sliced_chunks {
|
|
|
let first_rev = revs_chunk[0];
|
|
|
// Skip trailing revisions with empty diff
|
|
|
let last_rev_idx = revs_chunk
|
|
|
.iter()
|
|
|
.rposition(|r| self.length(*r) != 0)
|
|
|
.unwrap_or(revs_chunk.len() - 1);
|
|
|
|
|
|
let last_rev = revs_chunk[last_rev_idx];
|
|
|
|
|
|
let (offset, data) =
|
|
|
self.get_segment_for_revs(first_rev, last_rev)?;
|
|
|
|
|
|
let revs_chunk = &revs_chunk[..=last_rev_idx];
|
|
|
|
|
|
for rev in revs_chunk {
|
|
|
let chunk_start = self.start(*rev);
|
|
|
let chunk_length = self.length(*rev);
|
|
|
// TODO revlogv2 should check the compression mode
|
|
|
let bytes = &data[chunk_start - offset..][..chunk_length];
|
|
|
let chunk = if !bytes.is_empty() && bytes[0] == ZSTD_BYTE {
|
|
|
// If we're using `zstd`, we want to try a more
|
|
|
// specialized decompression
|
|
|
let entry = self.index.get_entry(*rev).unwrap();
|
|
|
let is_delta = entry
|
|
|
.base_revision_or_base_of_delta_chain()
|
|
|
!= (*rev).into();
|
|
|
let uncompressed = uncompressed_zstd_data(
|
|
|
bytes,
|
|
|
is_delta,
|
|
|
entry.uncompressed_len(),
|
|
|
)?;
|
|
|
Cow::Owned(uncompressed)
|
|
|
} else {
|
|
|
// Otherwise just fallback to generic decompression.
|
|
|
self.decompress(bytes)?
|
|
|
};
|
|
|
|
|
|
chunks.push((*rev, chunk.into()));
|
|
|
}
|
|
|
}
|
|
|
Ok(())
|
|
|
})?;
|
|
|
|
|
|
if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
|
|
|
let mut cache = cache.borrow_mut();
|
|
|
for (rev, chunk) in chunks.iter().skip(already_cached) {
|
|
|
cache.insert(*rev, chunk.clone());
|
|
|
}
|
|
|
}
|
|
|
// Use stable sort here since it's *mostly* sorted
|
|
|
chunks.sort_by(|a, b| a.0.cmp(&b.0));
|
|
|
Ok(chunks.into_iter().map(|(_r, chunk)| chunk).collect())
|
|
|
}
|
|
|
|
|
|
/// Slice revs to reduce the amount of unrelated data to be read from disk.
|
|
|
///
|
|
|
/// ``revs`` is sliced into groups that should be read in one time.
|
|
|
/// Assume that revs are sorted.
|
|
|
///
|
|
|
/// The initial chunk is sliced until the overall density
|
|
|
/// (payload/chunks-span ratio) is above
|
|
|
/// `revlog.data_config.sr_density_threshold`.
|
|
|
/// No gap smaller than `revlog.data_config.sr_min_gap_size` is skipped.
|
|
|
///
|
|
|
/// If `target_size` is set, no chunk larger than `target_size`
|
|
|
/// will be returned.
|
|
|
/// For consistency with other slicing choices, this limit won't go lower
|
|
|
/// than `revlog.data_config.sr_min_gap_size`.
|
|
|
///
|
|
|
/// If individual revision chunks are larger than this limit, they will
|
|
|
/// still be raised individually.
|
|
|
pub fn slice_chunk(
|
|
|
&self,
|
|
|
revs: &[Revision],
|
|
|
target_size: Option<u64>,
|
|
|
) -> Result<Vec<Vec<Revision>>, RevlogError> {
|
|
|
let target_size =
|
|
|
target_size.map(|size| size.max(self.data_config.sr_min_gap_size));
|
|
|
|
|
|
let target_density = self.data_config.sr_density_threshold;
|
|
|
let min_gap_size = self.data_config.sr_min_gap_size as usize;
|
|
|
let to_density = self.index.slice_chunk_to_density(
|
|
|
revs,
|
|
|
target_density,
|
|
|
min_gap_size,
|
|
|
);
|
|
|
|
|
|
let mut sliced = vec![];
|
|
|
|
|
|
for chunk in to_density {
|
|
|
sliced.extend(
|
|
|
self.slice_chunk_to_size(&chunk, target_size)?
|
|
|
.into_iter()
|
|
|
.map(ToOwned::to_owned),
|
|
|
);
|
|
|
}
|
|
|
|
|
|
Ok(sliced)
|
|
|
}
|
|
|
|
|
|
/// Slice revs to match the target size
|
|
|
///
|
|
|
/// This is intended to be used on chunks that density slicing selected,
|
|
|
/// but that are still too large compared to the read guarantee of revlogs.
|
|
|
/// This might happen when the "minimal gap size" interrupted the slicing
|
|
|
/// or when chains are built in a way that create large blocks next to
|
|
|
/// each other.
|
|
|
fn slice_chunk_to_size<'a>(
|
|
|
&self,
|
|
|
revs: &'a [Revision],
|
|
|
target_size: Option<u64>,
|
|
|
) -> Result<Vec<&'a [Revision]>, RevlogError> {
|
|
|
let mut start_data = self.start(revs[0]);
|
|
|
let end_data = self.end(revs[revs.len() - 1]);
|
|
|
let full_span = end_data - start_data;
|
|
|
|
|
|
let nothing_to_do = target_size
|
|
|
.map(|size| full_span <= size as usize)
|
|
|
.unwrap_or(true);
|
|
|
|
|
|
if nothing_to_do {
|
|
|
return Ok(vec![revs]);
|
|
|
}
|
|
|
let target_size = target_size.expect("target_size is set") as usize;
|
|
|
|
|
|
let mut start_rev_idx = 0;
|
|
|
let mut end_rev_idx = 1;
|
|
|
let mut chunks = vec![];
|
|
|
|
|
|
for (idx, rev) in revs.iter().enumerate().skip(1) {
|
|
|
let span = self.end(*rev) - start_data;
|
|
|
let is_snapshot = self.is_snapshot(*rev)?;
|
|
|
if span <= target_size && is_snapshot {
|
|
|
end_rev_idx = idx + 1;
|
|
|
} else {
|
|
|
let chunk =
|
|
|
self.trim_chunk(revs, start_rev_idx, Some(end_rev_idx));
|
|
|
if !chunk.is_empty() {
|
|
|
chunks.push(chunk);
|
|
|
}
|
|
|
start_rev_idx = idx;
|
|
|
start_data = self.start(*rev);
|
|
|
end_rev_idx = idx + 1;
|
|
|
}
|
|
|
if !is_snapshot {
|
|
|
break;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// For the others, we use binary slicing to quickly converge towards
|
|
|
// valid chunks (otherwise, we might end up looking for the start/end
|
|
|
// of many revisions). This logic is not looking for the perfect
|
|
|
// slicing point, it quickly converges towards valid chunks.
|
|
|
let number_of_items = revs.len();
|
|
|
|
|
|
while (end_data - start_data) > target_size {
|
|
|
end_rev_idx = number_of_items;
|
|
|
if number_of_items - start_rev_idx <= 1 {
|
|
|
// Protect against individual chunks larger than the limit
|
|
|
break;
|
|
|
}
|
|
|
let mut local_end_data = self.end(revs[end_rev_idx - 1]);
|
|
|
let mut span = local_end_data - start_data;
|
|
|
while span > target_size {
|
|
|
if end_rev_idx - start_rev_idx <= 1 {
|
|
|
// Protect against individual chunks larger than the limit
|
|
|
break;
|
|
|
}
|
|
|
end_rev_idx -= (end_rev_idx - start_rev_idx) / 2;
|
|
|
local_end_data = self.end(revs[end_rev_idx - 1]);
|
|
|
span = local_end_data - start_data;
|
|
|
}
|
|
|
let chunk =
|
|
|
self.trim_chunk(revs, start_rev_idx, Some(end_rev_idx));
|
|
|
if !chunk.is_empty() {
|
|
|
chunks.push(chunk);
|
|
|
}
|
|
|
start_rev_idx = end_rev_idx;
|
|
|
start_data = self.start(revs[start_rev_idx]);
|
|
|
}
|
|
|
|
|
|
let chunk = self.trim_chunk(revs, start_rev_idx, None);
|
|
|
if !chunk.is_empty() {
|
|
|
chunks.push(chunk);
|
|
|
}
|
|
|
|
|
|
Ok(chunks)
|
|
|
}
|
|
|
|
|
|
/// Returns `revs[startidx..endidx]` without empty trailing revs
|
|
|
fn trim_chunk<'a>(
|
|
|
&self,
|
|
|
revs: &'a [Revision],
|
|
|
start_rev_idx: usize,
|
|
|
end_rev_idx: Option<usize>,
|
|
|
) -> &'a [Revision] {
|
|
|
let mut end_rev_idx = end_rev_idx.unwrap_or(revs.len());
|
|
|
|
|
|
// If we have a non-empty delta candidate, there is nothing to trim
|
|
|
if revs[end_rev_idx - 1].0 < self.len() as BaseRevision {
|
|
|
// Trim empty revs at the end, except the very first rev of a chain
|
|
|
while end_rev_idx > 1
|
|
|
&& end_rev_idx > start_rev_idx
|
|
|
&& self.length(revs[end_rev_idx - 1]) == 0
|
|
|
{
|
|
|
end_rev_idx -= 1
|
|
|
}
|
|
|
}
|
|
|
|
|
|
&revs[start_rev_idx..end_rev_idx]
|
|
|
}
|
|
|
|
|
|
/// Check the hash of some given data against the recorded hash.
|
|
|
pub fn check_hash(
|
|
|
&self,
|
|
|
p1: Revision,
|
|
|
p2: Revision,
|
|
|
expected: &[u8],
|
|
|
data: &[u8],
|
|
|
) -> bool {
|
|
|
let e1 = self.index.get_entry(p1);
|
|
|
let h1 = match e1 {
|
|
|
Some(ref entry) => entry.hash(),
|
|
|
None => &NULL_NODE,
|
|
|
};
|
|
|
let e2 = self.index.get_entry(p2);
|
|
|
let h2 = match e2 {
|
|
|
Some(ref entry) => entry.hash(),
|
|
|
None => &NULL_NODE,
|
|
|
};
|
|
|
|
|
|
hash(data, h1.as_bytes(), h2.as_bytes()) == expected
|
|
|
}
|
|
|
|
|
|
/// Returns whether we are currently in a [`Self::with_write`] context
|
|
|
pub fn is_writing(&self) -> bool {
|
|
|
self.writing_handles.is_some()
|
|
|
}
|
|
|
|
|
|
/// Open the revlog files for writing
|
|
|
///
|
|
|
/// Adding content to a revlog should be done within this context.
|
|
|
/// TODO try using `BufRead` and `BufWrite` and see if performance improves
|
|
|
pub fn with_write<R>(
|
|
|
&mut self,
|
|
|
transaction: &mut impl Transaction,
|
|
|
data_end: Option<usize>,
|
|
|
func: impl FnOnce() -> R,
|
|
|
) -> Result<R, HgError> {
|
|
|
if self.is_writing() {
|
|
|
return Ok(func());
|
|
|
}
|
|
|
self.enter_writing_context(data_end, transaction)
|
|
|
.map_err(|e| {
|
|
|
self.exit_writing_context();
|
|
|
e
|
|
|
})?;
|
|
|
let res = func();
|
|
|
self.exit_writing_context();
|
|
|
Ok(res)
|
|
|
}
|
|
|
|
|
|
/// `pub` only for use in hg-cpython
|
|
|
#[doc(hidden)]
|
|
|
pub fn exit_writing_context(&mut self) {
|
|
|
self.writing_handles.take();
|
|
|
self.segment_file.writing_handle.take();
|
|
|
self.segment_file.reading_handle.take();
|
|
|
}
|
|
|
|
|
|
/// `pub` only for use in hg-cpython
|
|
|
#[doc(hidden)]
|
|
|
pub fn python_writing_handles(&self) -> Option<&WriteHandles> {
|
|
|
self.writing_handles.as_ref()
|
|
|
}
|
|
|
|
|
|
/// `pub` only for use in hg-cpython
|
|
|
#[doc(hidden)]
|
|
|
pub fn enter_writing_context(
|
|
|
&mut self,
|
|
|
data_end: Option<usize>,
|
|
|
transaction: &mut impl Transaction,
|
|
|
) -> Result<(), HgError> {
|
|
|
let data_size = if self.is_empty() {
|
|
|
0
|
|
|
} else {
|
|
|
self.end(Revision((self.len() - 1) as BaseRevision))
|
|
|
};
|
|
|
let data_handle = if !self.is_inline() {
|
|
|
let data_handle = match self.vfs.open(&self.data_file) {
|
|
|
Ok(mut f) => {
|
|
|
if let Some(end) = data_end {
|
|
|
f.seek(SeekFrom::Start(end as u64))
|
|
|
.when_reading_file(&self.data_file)?;
|
|
|
} else {
|
|
|
f.seek(SeekFrom::End(0))
|
|
|
.when_reading_file(&self.data_file)?;
|
|
|
}
|
|
|
f
|
|
|
}
|
|
|
Err(e) => match e {
|
|
|
HgError::IoError { error, context } => {
|
|
|
if error.kind() != ErrorKind::NotFound {
|
|
|
return Err(HgError::IoError { error, context });
|
|
|
}
|
|
|
self.vfs.create(&self.data_file)?
|
|
|
}
|
|
|
e => return Err(e),
|
|
|
},
|
|
|
};
|
|
|
transaction.add(&self.data_file, data_size);
|
|
|
Some(FileHandle::from_file(
|
|
|
data_handle,
|
|
|
dyn_clone::clone_box(&*self.vfs),
|
|
|
&self.data_file,
|
|
|
))
|
|
|
} else {
|
|
|
None
|
|
|
};
|
|
|
let index_size = self.len() * INDEX_ENTRY_SIZE;
|
|
|
let index_handle = self.index_write_handle()?;
|
|
|
if self.is_inline() {
|
|
|
transaction.add(&self.index_file, data_size);
|
|
|
} else {
|
|
|
transaction.add(&self.index_file, index_size);
|
|
|
}
|
|
|
self.writing_handles = Some(WriteHandles {
|
|
|
index_handle: index_handle.clone(),
|
|
|
data_handle: data_handle.clone(),
|
|
|
});
|
|
|
*self.segment_file.reading_handle.borrow_mut() = if self.is_inline() {
|
|
|
Some(index_handle)
|
|
|
} else {
|
|
|
data_handle
|
|
|
};
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
/// Get a write handle to the index, sought to the end of its data.
|
|
|
fn index_write_handle(&self) -> Result<FileHandle, HgError> {
|
|
|
let res = if self.delayed_buffer.is_none() {
|
|
|
if self.data_config.check_ambig {
|
|
|
self.vfs.open_check_ambig(&self.index_file)
|
|
|
} else {
|
|
|
self.vfs.open(&self.index_file)
|
|
|
}
|
|
|
} else {
|
|
|
self.vfs.open(&self.index_file)
|
|
|
};
|
|
|
match res {
|
|
|
Ok(mut handle) => {
|
|
|
handle
|
|
|
.seek(SeekFrom::End(0))
|
|
|
.when_reading_file(&self.index_file)?;
|
|
|
Ok(
|
|
|
if let Some(delayed_buffer) = self.delayed_buffer.as_ref()
|
|
|
{
|
|
|
FileHandle::from_file_delayed(
|
|
|
handle,
|
|
|
dyn_clone::clone_box(&*self.vfs),
|
|
|
&self.index_file,
|
|
|
delayed_buffer.clone(),
|
|
|
)?
|
|
|
} else {
|
|
|
FileHandle::from_file(
|
|
|
handle,
|
|
|
dyn_clone::clone_box(&*self.vfs),
|
|
|
&self.index_file,
|
|
|
)
|
|
|
},
|
|
|
)
|
|
|
}
|
|
|
Err(e) => match e {
|
|
|
HgError::IoError { error, context } => {
|
|
|
if error.kind() != ErrorKind::NotFound {
|
|
|
return Err(HgError::IoError { error, context });
|
|
|
};
|
|
|
if let Some(delayed_buffer) = self.delayed_buffer.as_ref()
|
|
|
{
|
|
|
FileHandle::new_delayed(
|
|
|
dyn_clone::clone_box(&*self.vfs),
|
|
|
&self.index_file,
|
|
|
true,
|
|
|
delayed_buffer.clone(),
|
|
|
)
|
|
|
} else {
|
|
|
FileHandle::new(
|
|
|
dyn_clone::clone_box(&*self.vfs),
|
|
|
&self.index_file,
|
|
|
true,
|
|
|
true,
|
|
|
)
|
|
|
}
|
|
|
}
|
|
|
e => Err(e),
|
|
|
},
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/// Split the data of an inline revlog into an index and a data file
|
|
|
pub fn split_inline(
|
|
|
&mut self,
|
|
|
header: IndexHeader,
|
|
|
new_index_file_path: Option<PathBuf>,
|
|
|
) -> Result<PathBuf, RevlogError> {
|
|
|
assert!(self.delayed_buffer.is_none());
|
|
|
let existing_handles = self.writing_handles.is_some();
|
|
|
if let Some(handles) = &mut self.writing_handles {
|
|
|
handles.index_handle.flush()?;
|
|
|
self.writing_handles.take();
|
|
|
self.segment_file.writing_handle.take();
|
|
|
}
|
|
|
let mut new_data_file_handle = self.vfs.create(&self.data_file)?;
|
|
|
// Drop any potential data, possibly redundant with the VFS impl.
|
|
|
new_data_file_handle
|
|
|
.set_len(0)
|
|
|
.when_writing_file(&self.data_file)?;
|
|
|
|
|
|
self.with_read(|| -> Result<(), RevlogError> {
|
|
|
for r in 0..self.index.len() {
|
|
|
let rev = Revision(r as BaseRevision);
|
|
|
let rev_segment = self.get_segment_for_revs(rev, rev)?.1;
|
|
|
new_data_file_handle
|
|
|
.write_all(&rev_segment)
|
|
|
.when_writing_file(&self.data_file)?;
|
|
|
}
|
|
|
new_data_file_handle
|
|
|
.flush()
|
|
|
.when_writing_file(&self.data_file)?;
|
|
|
Ok(())
|
|
|
})?;
|
|
|
|
|
|
if let Some(index_path) = new_index_file_path {
|
|
|
self.index_file = index_path
|
|
|
}
|
|
|
|
|
|
let mut new_index_handle = self.vfs.create(&self.index_file)?;
|
|
|
let mut new_data = Vec::with_capacity(self.len() * INDEX_ENTRY_SIZE);
|
|
|
for r in 0..self.len() {
|
|
|
let rev = Revision(r as BaseRevision);
|
|
|
let entry = self.index.entry_binary(rev).unwrap_or_else(|| {
|
|
|
panic!(
|
|
|
"entry {} should exist in {}",
|
|
|
r,
|
|
|
self.index_file.display()
|
|
|
)
|
|
|
});
|
|
|
if r == 0 {
|
|
|
new_data.extend(header.header_bytes);
|
|
|
}
|
|
|
new_data.extend(entry);
|
|
|
}
|
|
|
new_index_handle
|
|
|
.write_all(&new_data)
|
|
|
.when_writing_file(&self.index_file)?;
|
|
|
// Replace the index with a new one because the buffer contains inline
|
|
|
// data
|
|
|
self.index = Index::new(Box::new(new_data), header)?;
|
|
|
self.inline = false;
|
|
|
|
|
|
self.segment_file = RandomAccessFile::new(
|
|
|
dyn_clone::clone_box(&*self.vfs),
|
|
|
self.data_file.to_owned(),
|
|
|
);
|
|
|
if existing_handles {
|
|
|
// Switched from inline to conventional, reopen the index
|
|
|
let new_data_handle = Some(FileHandle::from_file(
|
|
|
new_data_file_handle,
|
|
|
dyn_clone::clone_box(&*self.vfs),
|
|
|
&self.data_file,
|
|
|
));
|
|
|
self.writing_handles = Some(WriteHandles {
|
|
|
index_handle: self.index_write_handle()?,
|
|
|
data_handle: new_data_handle.clone(),
|
|
|
});
|
|
|
*self.segment_file.writing_handle.borrow_mut() = new_data_handle;
|
|
|
}
|
|
|
|
|
|
Ok(self.index_file.to_owned())
|
|
|
}
|
|
|
|
|
|
/// Write a new entry to this revlog.
|
|
|
/// - `entry` is the index bytes
|
|
|
/// - `header_and_data` is the compression header and the revision data
|
|
|
/// - `offset` is the position in the data file to write to
|
|
|
/// - `index_end` is the overwritten position in the index in revlog-v2,
|
|
|
/// since the format may allow a rewrite of garbage data at the end.
|
|
|
/// - `data_end` is the overwritten position in the data-file in revlog-v2,
|
|
|
/// since the format may allow a rewrite of garbage data at the end.
|
|
|
///
|
|
|
/// XXX Why do we have `data_end` *and* `offset`? Same question in Python
|
|
|
pub fn write_entry(
|
|
|
&mut self,
|
|
|
mut transaction: impl Transaction,
|
|
|
entry: &[u8],
|
|
|
header_and_data: (&[u8], &[u8]),
|
|
|
mut offset: usize,
|
|
|
index_end: Option<u64>,
|
|
|
data_end: Option<u64>,
|
|
|
) -> Result<(u64, Option<u64>), HgError> {
|
|
|
let current_revision = self.len() - 1;
|
|
|
let canonical_index_file = self.canonical_index_file();
|
|
|
|
|
|
let is_inline = self.is_inline();
|
|
|
let handles = match &mut self.writing_handles {
|
|
|
None => {
|
|
|
return Err(HgError::abort(
|
|
|
"adding revision outside of the `with_write` context",
|
|
|
exit_codes::ABORT,
|
|
|
None,
|
|
|
));
|
|
|
}
|
|
|
Some(handles) => handles,
|
|
|
};
|
|
|
let index_handle = &mut handles.index_handle;
|
|
|
let data_handle = &mut handles.data_handle;
|
|
|
if let Some(end) = index_end {
|
|
|
index_handle
|
|
|
.seek(SeekFrom::Start(end))
|
|
|
.when_reading_file(&self.index_file)?;
|
|
|
} else {
|
|
|
index_handle
|
|
|
.seek(SeekFrom::End(0))
|
|
|
.when_reading_file(&self.index_file)?;
|
|
|
}
|
|
|
if let Some(data_handle) = data_handle {
|
|
|
if let Some(end) = data_end {
|
|
|
data_handle
|
|
|
.seek(SeekFrom::Start(end))
|
|
|
.when_reading_file(&self.data_file)?;
|
|
|
} else {
|
|
|
data_handle
|
|
|
.seek(SeekFrom::End(0))
|
|
|
.when_reading_file(&self.data_file)?;
|
|
|
}
|
|
|
}
|
|
|
let (header, data) = header_and_data;
|
|
|
|
|
|
if !is_inline {
|
|
|
transaction.add(&self.data_file, offset);
|
|
|
transaction
|
|
|
.add(&canonical_index_file, current_revision * entry.len());
|
|
|
let data_handle = data_handle
|
|
|
.as_mut()
|
|
|
.expect("data handle should exist when not inline");
|
|
|
if !header.is_empty() {
|
|
|
data_handle.write_all(header)?;
|
|
|
}
|
|
|
data_handle.write_all(data)?;
|
|
|
match &mut self.delayed_buffer {
|
|
|
Some(buf) => {
|
|
|
buf.lock()
|
|
|
.expect("propagate the panic")
|
|
|
.buffer
|
|
|
.write_all(entry)
|
|
|
.expect("write to delay buffer should succeed");
|
|
|
}
|
|
|
None => index_handle.write_all(entry)?,
|
|
|
}
|
|
|
} else if self.delayed_buffer.is_some() {
|
|
|
return Err(HgError::abort(
|
|
|
"invalid delayed write on inline revlog",
|
|
|
exit_codes::ABORT,
|
|
|
None,
|
|
|
));
|
|
|
} else {
|
|
|
offset += current_revision * entry.len();
|
|
|
transaction.add(&canonical_index_file, offset);
|
|
|
index_handle.write_all(entry)?;
|
|
|
index_handle.write_all(header)?;
|
|
|
index_handle.write_all(data)?;
|
|
|
}
|
|
|
let data_position = match data_handle {
|
|
|
Some(h) => Some(h.position()?),
|
|
|
None => None,
|
|
|
};
|
|
|
Ok((index_handle.position()?, data_position))
|
|
|
}
|
|
|
|
|
|
/// Return the real target index file and not the temporary when diverting
|
|
|
pub fn canonical_index_file(&self) -> PathBuf {
|
|
|
self.original_index_file
|
|
|
.as_ref()
|
|
|
.map(ToOwned::to_owned)
|
|
|
.unwrap_or_else(|| self.index_file.to_owned())
|
|
|
}
|
|
|
|
|
|
/// Return the path to the diverted index
|
|
|
fn diverted_index(&self) -> PathBuf {
|
|
|
self.index_file.with_extension("i.a")
|
|
|
}
|
|
|
|
|
|
/// True if we're in a [`Self::with_write`] or [`Self::with_read`] context
|
|
|
pub fn is_open(&self) -> bool {
|
|
|
self.segment_file.is_open()
|
|
|
}
|
|
|
|
|
|
/// Set this revlog to delay its writes to a buffer
|
|
|
pub fn delay(&mut self) -> Result<Option<PathBuf>, HgError> {
|
|
|
assert!(!self.is_open());
|
|
|
if self.is_inline() {
|
|
|
return Err(HgError::abort(
|
|
|
"revlog with delayed write should not be inline",
|
|
|
exit_codes::ABORT,
|
|
|
None,
|
|
|
));
|
|
|
}
|
|
|
if self.delayed_buffer.is_some() || self.original_index_file.is_some()
|
|
|
{
|
|
|
// Delay or divert already happening
|
|
|
return Ok(None);
|
|
|
}
|
|
|
if self.is_empty() {
|
|
|
self.original_index_file = Some(self.index_file.to_owned());
|
|
|
self.index_file = self.diverted_index();
|
|
|
if self.vfs.exists(&self.index_file) {
|
|
|
self.vfs.unlink(&self.index_file)?;
|
|
|
}
|
|
|
Ok(Some(self.index_file.to_owned()))
|
|
|
} else {
|
|
|
self.delayed_buffer =
|
|
|
Some(Arc::new(Mutex::new(DelayedBuffer::default())));
|
|
|
Ok(None)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/// Write the pending data (in memory) if any to the diverted index file
|
|
|
/// (on disk temporary file)
|
|
|
pub fn write_pending(
|
|
|
&mut self,
|
|
|
) -> Result<(Option<PathBuf>, bool), HgError> {
|
|
|
assert!(!self.is_open());
|
|
|
if self.is_inline() {
|
|
|
return Err(HgError::abort(
|
|
|
"revlog with delayed write should not be inline",
|
|
|
exit_codes::ABORT,
|
|
|
None,
|
|
|
));
|
|
|
}
|
|
|
if self.original_index_file.is_some() {
|
|
|
return Ok((None, true));
|
|
|
}
|
|
|
let mut any_pending = false;
|
|
|
let pending_index_file = self.diverted_index();
|
|
|
if self.vfs.exists(&pending_index_file) {
|
|
|
self.vfs.unlink(&pending_index_file)?;
|
|
|
}
|
|
|
self.vfs.copy(&self.index_file, &pending_index_file)?;
|
|
|
if let Some(delayed_buffer) = self.delayed_buffer.take() {
|
|
|
let mut index_file_handle = self.vfs.open(&pending_index_file)?;
|
|
|
index_file_handle
|
|
|
.seek(SeekFrom::End(0))
|
|
|
.when_writing_file(&pending_index_file)?;
|
|
|
let delayed_data =
|
|
|
&delayed_buffer.lock().expect("propagate the panic").buffer;
|
|
|
index_file_handle
|
|
|
.write_all(delayed_data)
|
|
|
.when_writing_file(&pending_index_file)?;
|
|
|
any_pending = true;
|
|
|
}
|
|
|
self.original_index_file = Some(self.index_file.to_owned());
|
|
|
self.index_file = pending_index_file;
|
|
|
Ok((Some(self.index_file.to_owned()), any_pending))
|
|
|
}
|
|
|
|
|
|
/// Overwrite the canonical file with the diverted file, or write out the
|
|
|
/// delayed buffer.
|
|
|
/// Returns an error if the revlog is neither diverted nor delayed.
|
|
|
pub fn finalize_pending(&mut self) -> Result<PathBuf, HgError> {
|
|
|
assert!(!self.is_open());
|
|
|
if self.is_inline() {
|
|
|
return Err(HgError::abort(
|
|
|
"revlog with delayed write should not be inline",
|
|
|
exit_codes::ABORT,
|
|
|
None,
|
|
|
));
|
|
|
}
|
|
|
match (
|
|
|
self.delayed_buffer.as_ref(),
|
|
|
self.original_index_file.as_ref(),
|
|
|
) {
|
|
|
(None, None) => {
|
|
|
return Err(HgError::abort(
|
|
|
"neither delay nor divert found on this revlog",
|
|
|
exit_codes::ABORT,
|
|
|
None,
|
|
|
));
|
|
|
}
|
|
|
(Some(delay), None) => {
|
|
|
let mut index_file_handle = self.vfs.open(&self.index_file)?;
|
|
|
index_file_handle
|
|
|
.seek(SeekFrom::End(0))
|
|
|
.when_writing_file(&self.index_file)?;
|
|
|
index_file_handle
|
|
|
.write_all(
|
|
|
&delay.lock().expect("propagate the panic").buffer,
|
|
|
)
|
|
|
.when_writing_file(&self.index_file)?;
|
|
|
self.delayed_buffer = None;
|
|
|
}
|
|
|
(None, Some(divert)) => {
|
|
|
if self.vfs.exists(&self.index_file) {
|
|
|
self.vfs.rename(&self.index_file, divert, true)?;
|
|
|
}
|
|
|
divert.clone_into(&mut self.index_file);
|
|
|
self.original_index_file = None;
|
|
|
}
|
|
|
(Some(_), Some(_)) => unreachable!(
|
|
|
"{} is in an inconsistent state of both delay and divert",
|
|
|
self.canonical_index_file().display(),
|
|
|
),
|
|
|
}
|
|
|
Ok(self.canonical_index_file())
|
|
|
}
|
|
|
|
|
|
/// `pub` only for `hg-cpython`. This is made a different method than
|
|
|
/// [`Revlog::index`] in case there is a different invariant that pops up
|
|
|
/// later.
|
|
|
#[doc(hidden)]
|
|
|
pub fn shared_index(&self) -> &Index {
|
|
|
&self.index
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/// The use of a [`Refcell`] assumes that a given revlog will only
|
|
|
/// be accessed (read or write) by a single thread.
|
|
|
type UncompressedChunkCache =
|
|
|
RefCell<LruMap<Revision, Arc<[u8]>, ByMemoryUsage>>;
|
|
|
|
|
|
/// The node, revision and data for the last revision we've seen. Speeds up
|
|
|
/// a lot of sequential operations of the revlog.
|
|
|
///
|
|
|
/// The data is not just bytes since it can come from Python and we want to
|
|
|
/// avoid copies if possible.
|
|
|
type SingleRevisionCache =
|
|
|
(Node, Revision, Box<dyn Deref<Target = [u8]> + Send>);
|
|
|
|
|
|
/// A way of progressively filling a buffer with revision data, then return
|
|
|
/// that buffer. Used to abstract away Python-allocated code to reduce copying
|
|
|
/// for performance reasons.
|
|
|
pub trait RevisionBuffer {
|
|
|
/// The owned buffer type to return
|
|
|
type Target;
|
|
|
/// Copies the slice into the buffer
|
|
|
fn extend_from_slice(&mut self, slice: &[u8]);
|
|
|
/// Returns the now finished owned buffer
|
|
|
fn finish(self) -> Self::Target;
|
|
|
}
|
|
|
|
|
|
/// A simple vec-based buffer. This is uselessly complicated for the pure Rust
|
|
|
/// case, but it's the price to pay for Python compatibility.
|
|
|
#[derive(Debug)]
|
|
|
pub(super) struct CoreRevisionBuffer {
|
|
|
buf: Vec<u8>,
|
|
|
}
|
|
|
|
|
|
impl CoreRevisionBuffer {
|
|
|
pub fn new() -> Self {
|
|
|
Self { buf: vec![] }
|
|
|
}
|
|
|
|
|
|
#[inline]
|
|
|
pub fn resize(&mut self, size: usize) {
|
|
|
self.buf.reserve_exact(size - self.buf.capacity());
|
|
|
}
|
|
|
}
|
|
|
|
|
|
impl RevisionBuffer for CoreRevisionBuffer {
|
|
|
type Target = Vec<u8>;
|
|
|
|
|
|
#[inline]
|
|
|
fn extend_from_slice(&mut self, slice: &[u8]) {
|
|
|
self.buf.extend_from_slice(slice);
|
|
|
}
|
|
|
|
|
|
#[inline]
|
|
|
fn finish(self) -> Self::Target {
|
|
|
self.buf
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/// Calculate the hash of a revision given its data and its parents.
|
|
|
pub fn hash(
|
|
|
data: &[u8],
|
|
|
p1_hash: &[u8],
|
|
|
p2_hash: &[u8],
|
|
|
) -> [u8; NODE_BYTES_LENGTH] {
|
|
|
let mut hasher = Sha1::new();
|
|
|
let (a, b) = (p1_hash, p2_hash);
|
|
|
if a > b {
|
|
|
hasher.update(b);
|
|
|
hasher.update(a);
|
|
|
} else {
|
|
|
hasher.update(a);
|
|
|
hasher.update(b);
|
|
|
}
|
|
|
hasher.update(data);
|
|
|
*hasher.finalize().as_ref()
|
|
|
}
|
|
|
|