##// END OF EJS Templates
revlog: update data file record before index rename...
revlog: update data file record before index rename When migrating from inline to non-inline data storage, the data file is recorded initially as zero sized so that it is removed on failure. But the record has to be updated before the index is renamed, otherwise data is lost on rollback. Differential Revision: https://phab.mercurial-scm.org/D10725

File last commit:

r47577:e8ae91b1 default
r48065:46b828b8 default
Show More
revlog.rs
395 lines | 11.9 KiB | application/rls-services+xml | RustLexer
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 use std::borrow::Cow;
use std::io::Read;
use std::ops::Deref;
use std::path::Path;
use byteorder::{BigEndian, ByteOrder};
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 use crypto::digest::Digest;
use crypto::sha1::Sha1;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 use flate2::read::ZlibDecoder;
use micro_timer::timed;
use zstd;
use super::index::Index;
Simon Sapin
rust: Make NodePrefix allocation-free and Copy, remove NodePrefixRef...
r47160 use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
Simon Sapin
rhg: use persistent nodemap when available...
r46706 use super::nodemap;
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 use super::nodemap::{NodeMap, NodeMapError};
Simon Sapin
rhg: use persistent nodemap when available...
r46706 use super::nodemap_docket::NodeMapDocket;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 use super::patch;
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 use crate::errors::HgError;
Simon Sapin
rust: introduce Repo and Vfs types for filesystem abstraction...
r46782 use crate::repo::Repo;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 use crate::revlog::Revision;
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 #[derive(derive_more::From)]
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 pub enum RevlogError {
InvalidRevision,
Pulkit Goyal
rhg: raise wdir specific error for `hg debugdata`...
r47577 /// Working directory is not supported
WDirUnsupported,
Simon Sapin
rhg: allow specifying a changeset ID prefix...
r46646 /// Found more than one entry whose ID match the requested prefix
AmbiguousPrefix,
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 #[from]
Other(HgError),
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 impl From<NodeMapError> for RevlogError {
fn from(error: NodeMapError) -> Self {
match error {
NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(),
}
}
}
impl RevlogError {
fn corrupted() -> Self {
RevlogError::Other(HgError::corrupted("corrupted revlog"))
Simon Sapin
rust: use the bytes-cast crate to parse persistent nodemaps...
r47119 }
}
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 /// Read only implementation of revlog.
pub struct Revlog {
/// When index and data are not interleaved: bytes of the revlog index.
/// When index and data are interleaved: bytes of the revlog index and
/// data.
Simon Sapin
rhg: `cat` command: print error messages for missing files...
r47478 pub(crate) index: Index,
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 /// When index and data are not interleaved: bytes of the revlog data
data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
Simon Sapin
rhg: use persistent nodemap when available...
r46706 /// When present on disk: the persistent nodemap for this revlog
nodemap: Option<nodemap::NodeTree>,
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
impl Revlog {
/// Open a revlog index file.
///
/// It will also open the associated data file if index and data are not
/// interleaved.
#[timed]
Antoine cezar
hg-core: fix path encoding usage...
r46408 pub fn open(
Simon Sapin
rust: introduce Repo and Vfs types for filesystem abstraction...
r46782 repo: &Repo,
index_path: impl AsRef<Path>,
Antoine cezar
hg-core: fix path encoding usage...
r46408 data_path: Option<&Path>,
) -> Result<Self, RevlogError> {
Simon Sapin
rust: introduce Repo and Vfs types for filesystem abstraction...
r46782 let index_path = index_path.as_ref();
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 let index_mmap = repo.store_vfs().mmap_open(&index_path)?;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097
let version = get_version(&index_mmap);
if version != 1 {
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 // A proper new version should have had a repo/store requirement.
return Err(RevlogError::corrupted());
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
Antoine cezar
hg-core: return Err if `offset != bytes.len()`...
r46176 let index = Index::new(Box::new(index_mmap))?;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097
Antoine cezar
hg-core: fix path encoding usage...
r46408 let default_data_path = index_path.with_extension("d");
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 // type annotation required
// won't recognize Mmap as Deref<Target = [u8]>
let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 if index.is_inline() {
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 None
} else {
Antoine cezar
hg-core: fix path encoding usage...
r46408 let data_path = data_path.unwrap_or(&default_data_path);
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 let data_mmap = repo.store_vfs().mmap_open(data_path)?;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 Some(Box::new(data_mmap))
};
Simon Sapin
rust: introduce Repo and Vfs types for filesystem abstraction...
r46782 let nodemap = NodeMapDocket::read_from_file(repo, index_path)?.map(
Simon Sapin
rhg: use persistent nodemap when available...
r46706 |(docket, data)| {
nodemap::NodeTree::load_bytes(
Box::new(data),
docket.data_length,
)
},
);
Ok(Revlog {
index,
data_bytes,
nodemap,
})
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
Antoine Cezar
hg-core: add `Revlog.get_node_rev`...
r46105 /// Return number of entries of the `Revlog`.
pub fn len(&self) -> usize {
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 self.index.len()
Antoine Cezar
hg-core: add `Revlog.get_node_rev`...
r46105 }
/// Returns `true` if the `Revlog` has zero `entries`.
pub fn is_empty(&self) -> bool {
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 self.index.is_empty()
Antoine Cezar
hg-core: add `Revlog.get_node_rev`...
r46105 }
/// Return the full data associated to a node.
#[timed]
Simon Sapin
rust: use NodePrefix::from_hex instead of hex::decode directly...
r46647 pub fn get_node_rev(
&self,
Simon Sapin
rust: Make NodePrefix allocation-free and Copy, remove NodePrefixRef...
r47160 node: NodePrefix,
Simon Sapin
rust: use NodePrefix::from_hex instead of hex::decode directly...
r46647 ) -> Result<Revision, RevlogError> {
Simon Sapin
rhg: use persistent nodemap when available...
r46706 if let Some(nodemap) = &self.nodemap {
return nodemap
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 .find_bin(&self.index, node)?
Simon Sapin
rhg: use persistent nodemap when available...
r46706 .ok_or(RevlogError::InvalidRevision);
}
// Fallback to linear scan when a persistent nodemap is not present.
// This happens when the persistent-nodemap experimental feature is not
// enabled, or for small revlogs.
//
// TODO: consider building a non-persistent nodemap in memory to
// optimize these cases.
Simon Sapin
rhg: allow specifying a changeset ID prefix...
r46646 let mut found_by_prefix = None;
Antoine Cezar
hg-core: add `Revlog.get_node_rev`...
r46105 for rev in (0..self.len() as Revision).rev() {
let index_entry =
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 self.index.get_entry(rev).ok_or(HgError::corrupted(
"revlog references a revision not in the index",
))?;
Simon Sapin
rust: use NodePrefix::from_hex instead of hex::decode directly...
r46647 if node == *index_entry.hash() {
Antoine Cezar
hg-core: add `Revlog.get_node_rev`...
r46105 return Ok(rev);
}
Simon Sapin
rust: use NodePrefix::from_hex instead of hex::decode directly...
r46647 if node.is_prefix_of(index_entry.hash()) {
Simon Sapin
rhg: allow specifying a changeset ID prefix...
r46646 if found_by_prefix.is_some() {
return Err(RevlogError::AmbiguousPrefix);
}
found_by_prefix = Some(rev)
}
Antoine Cezar
hg-core: add `Revlog.get_node_rev`...
r46105 }
Simon Sapin
rhg: allow specifying a changeset ID prefix...
r46646 found_by_prefix.ok_or(RevlogError::InvalidRevision)
Antoine Cezar
hg-core: add `Revlog.get_node_rev`...
r46105 }
Simon Sapin
rhg: centralize parsing of `--rev` CLI arguments...
r47162 /// Returns whether the given revision exists in this revlog.
pub fn has_rev(&self, rev: Revision) -> bool {
self.index.get_entry(rev).is_some()
}
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 /// Return the full data associated to a revision.
///
/// All entries required to build the final data out of deltas will be
/// retrieved as needed, and the deltas will be applied to the inital
/// snapshot to rebuild the final data.
#[timed]
pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> {
// Todo return -> Cow
let mut entry = self.get_entry(rev)?;
let mut delta_chain = vec![];
while let Some(base_rev) = entry.base_rev {
delta_chain.push(entry);
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 entry = self
.get_entry(base_rev)
.map_err(|_| RevlogError::corrupted())?;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 // TODO do not look twice in the index
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 let index_entry = self
.index
.get_entry(rev)
.ok_or(RevlogError::InvalidRevision)?;
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102
let data: Vec<u8> = if delta_chain.is_empty() {
entry.data()?.into()
} else {
Revlog::build_data_from_deltas(entry, &delta_chain)?
};
if self.check_hash(
index_entry.p1(),
index_entry.p2(),
Simon Sapin
rust: use NodePrefix::from_hex instead of hex::decode directly...
r46647 index_entry.hash().as_bytes(),
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 &data,
) {
Ok(data)
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 } else {
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 Err(RevlogError::corrupted())
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
}
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 /// Check the hash of some given data against the recorded hash.
pub fn check_hash(
&self,
p1: Revision,
p2: Revision,
expected: &[u8],
data: &[u8],
) -> bool {
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 let e1 = self.index.get_entry(p1);
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 let h1 = match e1 {
Some(ref entry) => entry.hash(),
Simon Sapin
rust: use NodePrefix::from_hex instead of hex::decode directly...
r46647 None => &NULL_NODE,
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 };
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 let e2 = self.index.get_entry(p2);
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 let h2 = match e2 {
Some(ref entry) => entry.hash(),
Simon Sapin
rust: use NodePrefix::from_hex instead of hex::decode directly...
r46647 None => &NULL_NODE,
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 };
Simon Sapin
rust: use NodePrefix::from_hex instead of hex::decode directly...
r46647 hash(data, h1.as_bytes(), h2.as_bytes()).as_slice() == expected
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 }
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 /// Build the full data of a revision out its snapshot
/// and its deltas.
#[timed]
fn build_data_from_deltas(
snapshot: RevlogEntry,
deltas: &[RevlogEntry],
) -> Result<Vec<u8>, RevlogError> {
let snapshot = snapshot.data()?;
let deltas = deltas
.iter()
.rev()
.map(RevlogEntry::data)
.collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
let patches: Vec<_> =
deltas.iter().map(|d| patch::PatchList::new(d)).collect();
let patch = patch::fold_patch_lists(&patches);
Ok(patch.apply(&snapshot))
}
/// Return the revlog data.
fn data(&self) -> &[u8] {
match self.data_bytes {
Some(ref data_bytes) => &data_bytes,
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 None => panic!(
"forgot to load the data or trying to access inline data"
),
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
}
/// Get an entry of the revlog.
fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 let index_entry = self
.index
.get_entry(rev)
.ok_or(RevlogError::InvalidRevision)?;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 let start = index_entry.offset();
let end = start + index_entry.compressed_len();
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 let data = if self.index.is_inline() {
self.index.data(start, end)
} else {
&self.data()[start..end]
};
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 let entry = RevlogEntry {
rev,
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 bytes: data,
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 compressed_len: index_entry.compressed_len(),
uncompressed_len: index_entry.uncompressed_len(),
base_rev: if index_entry.base_revision() == rev {
None
} else {
Some(index_entry.base_revision())
},
};
Ok(entry)
}
}
/// The revlog entry's bytes and the necessary informations to extract
/// the entry's data.
#[derive(Debug)]
pub struct RevlogEntry<'a> {
rev: Revision,
bytes: &'a [u8],
compressed_len: usize,
uncompressed_len: usize,
base_rev: Option<Revision>,
}
impl<'a> RevlogEntry<'a> {
/// Extract the data contained in the entry.
pub fn data(&self) -> Result<Cow<'_, [u8]>, RevlogError> {
if self.bytes.is_empty() {
return Ok(Cow::Borrowed(&[]));
}
match self.bytes[0] {
// Revision data is the entirety of the entry, including this
// header.
b'\0' => Ok(Cow::Borrowed(self.bytes)),
// Raw revision data follows.
b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
// zlib (RFC 1950) data.
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 // zstd data.
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 // A proper new format should have had a repo/store requirement.
_format_type => Err(RevlogError::corrupted()),
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
}
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 let mut decoder = ZlibDecoder::new(self.bytes);
if self.is_delta() {
let mut buf = Vec::with_capacity(self.compressed_len);
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 decoder
.read_to_end(&mut buf)
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 .map_err(|_| RevlogError::corrupted())?;
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 Ok(buf)
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 } else {
let mut buf = vec![0; self.uncompressed_len];
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 decoder
.read_exact(&mut buf)
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 .map_err(|_| RevlogError::corrupted())?;
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 Ok(buf)
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
}
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 if self.is_delta() {
let mut buf = Vec::with_capacity(self.compressed_len);
zstd::stream::copy_decode(self.bytes, &mut buf)
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 .map_err(|_| RevlogError::corrupted())?;
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 Ok(buf)
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 } else {
let mut buf = vec![0; self.uncompressed_len];
let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 .map_err(|_| RevlogError::corrupted())?;
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 if len != self.uncompressed_len {
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 Err(RevlogError::corrupted())
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 } else {
Ok(buf)
}
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
}
/// Tell if the entry is a snapshot or a delta
/// (influences on decompression).
fn is_delta(&self) -> bool {
self.base_rev.is_some()
}
}
/// Format version of the revlog.
pub fn get_version(index_bytes: &[u8]) -> u16 {
BigEndian::read_u16(&index_bytes[2..=3])
}
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 /// Calculate the hash of a revision given its data and its parents.
fn hash(data: &[u8], p1_hash: &[u8], p2_hash: &[u8]) -> Vec<u8> {
let mut hasher = Sha1::new();
let (a, b) = (p1_hash, p2_hash);
if a > b {
hasher.input(b);
hasher.input(a);
} else {
hasher.input(a);
hasher.input(b);
}
hasher.input(data);
let mut hash = vec![0; NODE_BYTES_LENGTH];
hasher.result(&mut hash);
hash
}
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 #[cfg(test)]
mod tests {
use super::*;
use super::super::index::IndexEntryBuilder;
#[test]
fn version_test() {
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 let bytes = IndexEntryBuilder::new()
.is_first(true)
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 .with_version(1)
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 .build();
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097
assert_eq!(get_version(&bytes), 1)
}
}