##// END OF EJS Templates
rhg: centralize parsing of `--rev` CLI arguments...
rhg: centralize parsing of `--rev` CLI arguments This new module will be the place to implement more of the revset language when we do so. Differential Revision: https://phab.mercurial-scm.org/D9873

File last commit:

r47162:4b381dbb default
r47162:4b381dbb default
Show More
revlog.rs
387 lines | 11.6 KiB | application/rls-services+xml | RustLexer
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 use std::borrow::Cow;
use std::io::Read;
use std::ops::Deref;
use std::path::Path;
use byteorder::{BigEndian, ByteOrder};
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 use crypto::digest::Digest;
use crypto::sha1::Sha1;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 use flate2::read::ZlibDecoder;
use micro_timer::timed;
use zstd;
use super::index::Index;
Simon Sapin
rust: Make NodePrefix allocation-free and Copy, remove NodePrefixRef...
r47160 use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
Simon Sapin
rhg: use persistent nodemap when available...
r46706 use super::nodemap;
use super::nodemap::NodeMap;
use super::nodemap_docket::NodeMapDocket;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 use super::patch;
Simon Sapin
rust: introduce Repo and Vfs types for filesystem abstraction...
r46782 use crate::repo::Repo;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 use crate::revlog::Revision;
pub enum RevlogError {
IoError(std::io::Error),
UnsuportedVersion(u16),
InvalidRevision,
Simon Sapin
rhg: allow specifying a changeset ID prefix...
r46646 /// Found more than one entry whose ID match the requested prefix
AmbiguousPrefix,
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 Corrupted,
UnknowDataFormat(u8),
}
Simon Sapin
rust: use the bytes-cast crate to parse persistent nodemaps...
r47119 impl From<bytes_cast::FromBytesError> for RevlogError {
fn from(_: bytes_cast::FromBytesError) -> Self {
RevlogError::Corrupted
}
}
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 /// Read only implementation of revlog.
pub struct Revlog {
/// When index and data are not interleaved: bytes of the revlog index.
/// When index and data are interleaved: bytes of the revlog index and
/// data.
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 index: Index,
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 /// When index and data are not interleaved: bytes of the revlog data
data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
Simon Sapin
rhg: use persistent nodemap when available...
r46706 /// When present on disk: the persistent nodemap for this revlog
nodemap: Option<nodemap::NodeTree>,
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
impl Revlog {
/// Open a revlog index file.
///
/// It will also open the associated data file if index and data are not
/// interleaved.
#[timed]
Antoine cezar
hg-core: fix path encoding usage...
r46408 pub fn open(
Simon Sapin
rust: introduce Repo and Vfs types for filesystem abstraction...
r46782 repo: &Repo,
index_path: impl AsRef<Path>,
Antoine cezar
hg-core: fix path encoding usage...
r46408 data_path: Option<&Path>,
) -> Result<Self, RevlogError> {
Simon Sapin
rust: introduce Repo and Vfs types for filesystem abstraction...
r46782 let index_path = index_path.as_ref();
let index_mmap = repo
.store_vfs()
.mmap_open(&index_path)
.map_err(RevlogError::IoError)?;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097
let version = get_version(&index_mmap);
if version != 1 {
return Err(RevlogError::UnsuportedVersion(version));
}
Antoine cezar
hg-core: return Err if `offset != bytes.len()`...
r46176 let index = Index::new(Box::new(index_mmap))?;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097
Antoine cezar
hg-core: fix path encoding usage...
r46408 let default_data_path = index_path.with_extension("d");
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 // type annotation required
// won't recognize Mmap as Deref<Target = [u8]>
let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 if index.is_inline() {
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 None
} else {
Antoine cezar
hg-core: fix path encoding usage...
r46408 let data_path = data_path.unwrap_or(&default_data_path);
Simon Sapin
rust: introduce Repo and Vfs types for filesystem abstraction...
r46782 let data_mmap = repo
.store_vfs()
.mmap_open(data_path)
.map_err(RevlogError::IoError)?;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 Some(Box::new(data_mmap))
};
Simon Sapin
rust: introduce Repo and Vfs types for filesystem abstraction...
r46782 let nodemap = NodeMapDocket::read_from_file(repo, index_path)?.map(
Simon Sapin
rhg: use persistent nodemap when available...
r46706 |(docket, data)| {
nodemap::NodeTree::load_bytes(
Box::new(data),
docket.data_length,
)
},
);
Ok(Revlog {
index,
data_bytes,
nodemap,
})
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
Antoine Cezar
hg-core: add `Revlog.get_node_rev`...
r46105 /// Return number of entries of the `Revlog`.
pub fn len(&self) -> usize {
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 self.index.len()
Antoine Cezar
hg-core: add `Revlog.get_node_rev`...
r46105 }
/// Returns `true` if the `Revlog` has zero `entries`.
pub fn is_empty(&self) -> bool {
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 self.index.is_empty()
Antoine Cezar
hg-core: add `Revlog.get_node_rev`...
r46105 }
/// Return the full data associated to a node.
#[timed]
Simon Sapin
rust: use NodePrefix::from_hex instead of hex::decode directly...
r46647 pub fn get_node_rev(
&self,
Simon Sapin
rust: Make NodePrefix allocation-free and Copy, remove NodePrefixRef...
r47160 node: NodePrefix,
Simon Sapin
rust: use NodePrefix::from_hex instead of hex::decode directly...
r46647 ) -> Result<Revision, RevlogError> {
Simon Sapin
rhg: use persistent nodemap when available...
r46706 if let Some(nodemap) = &self.nodemap {
return nodemap
.find_bin(&self.index, node)
// TODO: propagate details of this error:
.map_err(|_| RevlogError::Corrupted)?
.ok_or(RevlogError::InvalidRevision);
}
// Fallback to linear scan when a persistent nodemap is not present.
// This happens when the persistent-nodemap experimental feature is not
// enabled, or for small revlogs.
//
// TODO: consider building a non-persistent nodemap in memory to
// optimize these cases.
Simon Sapin
rhg: allow specifying a changeset ID prefix...
r46646 let mut found_by_prefix = None;
Antoine Cezar
hg-core: add `Revlog.get_node_rev`...
r46105 for rev in (0..self.len() as Revision).rev() {
let index_entry =
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 self.index.get_entry(rev).ok_or(RevlogError::Corrupted)?;
Simon Sapin
rust: use NodePrefix::from_hex instead of hex::decode directly...
r46647 if node == *index_entry.hash() {
Antoine Cezar
hg-core: add `Revlog.get_node_rev`...
r46105 return Ok(rev);
}
Simon Sapin
rust: use NodePrefix::from_hex instead of hex::decode directly...
r46647 if node.is_prefix_of(index_entry.hash()) {
Simon Sapin
rhg: allow specifying a changeset ID prefix...
r46646 if found_by_prefix.is_some() {
return Err(RevlogError::AmbiguousPrefix);
}
found_by_prefix = Some(rev)
}
Antoine Cezar
hg-core: add `Revlog.get_node_rev`...
r46105 }
Simon Sapin
rhg: allow specifying a changeset ID prefix...
r46646 found_by_prefix.ok_or(RevlogError::InvalidRevision)
Antoine Cezar
hg-core: add `Revlog.get_node_rev`...
r46105 }
Simon Sapin
rhg: centralize parsing of `--rev` CLI arguments...
r47162 /// Returns whether the given revision exists in this revlog.
pub fn has_rev(&self, rev: Revision) -> bool {
self.index.get_entry(rev).is_some()
}
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 /// Return the full data associated to a revision.
///
/// All entries required to build the final data out of deltas will be
/// retrieved as needed, and the deltas will be applied to the inital
/// snapshot to rebuild the final data.
#[timed]
pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> {
// Todo return -> Cow
let mut entry = self.get_entry(rev)?;
let mut delta_chain = vec![];
while let Some(base_rev) = entry.base_rev {
delta_chain.push(entry);
Antoine cezar
hg-core: use `.or(Err(Error))` not `.map_err(|_| Error)` (D9100#inline-15067)...
r46177 entry =
self.get_entry(base_rev).or(Err(RevlogError::Corrupted))?;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 // TODO do not look twice in the index
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 let index_entry = self
.index
.get_entry(rev)
.ok_or(RevlogError::InvalidRevision)?;
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102
let data: Vec<u8> = if delta_chain.is_empty() {
entry.data()?.into()
} else {
Revlog::build_data_from_deltas(entry, &delta_chain)?
};
if self.check_hash(
index_entry.p1(),
index_entry.p2(),
Simon Sapin
rust: use NodePrefix::from_hex instead of hex::decode directly...
r46647 index_entry.hash().as_bytes(),
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 &data,
) {
Ok(data)
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 } else {
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 Err(RevlogError::Corrupted)
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
}
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 /// Check the hash of some given data against the recorded hash.
pub fn check_hash(
&self,
p1: Revision,
p2: Revision,
expected: &[u8],
data: &[u8],
) -> bool {
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 let e1 = self.index.get_entry(p1);
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 let h1 = match e1 {
Some(ref entry) => entry.hash(),
Simon Sapin
rust: use NodePrefix::from_hex instead of hex::decode directly...
r46647 None => &NULL_NODE,
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 };
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 let e2 = self.index.get_entry(p2);
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 let h2 = match e2 {
Some(ref entry) => entry.hash(),
Simon Sapin
rust: use NodePrefix::from_hex instead of hex::decode directly...
r46647 None => &NULL_NODE,
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 };
Simon Sapin
rust: use NodePrefix::from_hex instead of hex::decode directly...
r46647 hash(data, h1.as_bytes(), h2.as_bytes()).as_slice() == expected
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 }
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 /// Build the full data of a revision out its snapshot
/// and its deltas.
#[timed]
fn build_data_from_deltas(
snapshot: RevlogEntry,
deltas: &[RevlogEntry],
) -> Result<Vec<u8>, RevlogError> {
let snapshot = snapshot.data()?;
let deltas = deltas
.iter()
.rev()
.map(RevlogEntry::data)
.collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
let patches: Vec<_> =
deltas.iter().map(|d| patch::PatchList::new(d)).collect();
let patch = patch::fold_patch_lists(&patches);
Ok(patch.apply(&snapshot))
}
/// Return the revlog data.
fn data(&self) -> &[u8] {
match self.data_bytes {
Some(ref data_bytes) => &data_bytes,
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 None => panic!(
"forgot to load the data or trying to access inline data"
),
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
}
/// Get an entry of the revlog.
fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 let index_entry = self
.index
.get_entry(rev)
.ok_or(RevlogError::InvalidRevision)?;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 let start = index_entry.offset();
let end = start + index_entry.compressed_len();
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 let data = if self.index.is_inline() {
self.index.data(start, end)
} else {
&self.data()[start..end]
};
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 let entry = RevlogEntry {
rev,
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 bytes: data,
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 compressed_len: index_entry.compressed_len(),
uncompressed_len: index_entry.uncompressed_len(),
base_rev: if index_entry.base_revision() == rev {
None
} else {
Some(index_entry.base_revision())
},
};
Ok(entry)
}
}
/// The revlog entry's bytes and the necessary informations to extract
/// the entry's data.
#[derive(Debug)]
pub struct RevlogEntry<'a> {
rev: Revision,
bytes: &'a [u8],
compressed_len: usize,
uncompressed_len: usize,
base_rev: Option<Revision>,
}
impl<'a> RevlogEntry<'a> {
/// Extract the data contained in the entry.
pub fn data(&self) -> Result<Cow<'_, [u8]>, RevlogError> {
if self.bytes.is_empty() {
return Ok(Cow::Borrowed(&[]));
}
match self.bytes[0] {
// Revision data is the entirety of the entry, including this
// header.
b'\0' => Ok(Cow::Borrowed(self.bytes)),
// Raw revision data follows.
b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
// zlib (RFC 1950) data.
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 // zstd data.
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 format_type => Err(RevlogError::UnknowDataFormat(format_type)),
}
}
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 let mut decoder = ZlibDecoder::new(self.bytes);
if self.is_delta() {
let mut buf = Vec::with_capacity(self.compressed_len);
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 decoder
.read_to_end(&mut buf)
.or(Err(RevlogError::Corrupted))?;
Ok(buf)
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 } else {
let mut buf = vec![0; self.uncompressed_len];
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 decoder
.read_exact(&mut buf)
.or(Err(RevlogError::Corrupted))?;
Ok(buf)
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
}
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 if self.is_delta() {
let mut buf = Vec::with_capacity(self.compressed_len);
zstd::stream::copy_decode(self.bytes, &mut buf)
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 .or(Err(RevlogError::Corrupted))?;
Ok(buf)
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 } else {
let mut buf = vec![0; self.uncompressed_len];
let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 .or(Err(RevlogError::Corrupted))?;
if len != self.uncompressed_len {
Err(RevlogError::Corrupted)
} else {
Ok(buf)
}
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
}
/// Tell if the entry is a snapshot or a delta
/// (influences on decompression).
fn is_delta(&self) -> bool {
self.base_rev.is_some()
}
}
/// Format version of the revlog.
pub fn get_version(index_bytes: &[u8]) -> u16 {
BigEndian::read_u16(&index_bytes[2..=3])
}
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 /// Calculate the hash of a revision given its data and its parents.
fn hash(data: &[u8], p1_hash: &[u8], p2_hash: &[u8]) -> Vec<u8> {
let mut hasher = Sha1::new();
let (a, b) = (p1_hash, p2_hash);
if a > b {
hasher.input(b);
hasher.input(a);
} else {
hasher.input(a);
hasher.input(b);
}
hasher.input(data);
let mut hash = vec![0; NODE_BYTES_LENGTH];
hasher.result(&mut hash);
hash
}
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 #[cfg(test)]
mod tests {
use super::*;
use super::super::index::IndexEntryBuilder;
#[test]
fn version_test() {
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 let bytes = IndexEntryBuilder::new()
.is_first(true)
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 .with_version(1)
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 .build();
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097
assert_eq!(get_version(&bytes), 1)
}
}