##// END OF EJS Templates
dirstate-v2: complain early on docket name collision...
dirstate-v2: complain early on docket name collision The alternative is that the dirstate gets deleted so the corruption persists and is hard to investigate. This happened to me in tests, where the dirstate names are taken from file, since the file got reverted. I expect this can also happen in prod with non-trivial probability (1/4 billion).

File last commit:

r50825:e98fd81b default
r50992:ca9d65d6 stable
Show More
revlog.rs
644 lines | 19.9 KiB | application/rls-services+xml | RustLexer
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 use std::borrow::Cow;
Simon Sapin
rhg: RevlogEntry::uncompressed_len is signed...
r49375 use std::convert::TryFrom;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 use std::io::Read;
use std::ops::Deref;
use std::path::Path;
use flate2::read::ZlibDecoder;
Simon Sapin
rust: Use a maintained crate for SHA-1 hashing...
r48171 use sha1::{Digest, Sha1};
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 use zstd;
use super::index::Index;
Simon Sapin
rust: Make NodePrefix allocation-free and Copy, remove NodePrefixRef...
r47160 use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
Simon Sapin
rhg: use persistent nodemap when available...
r46706 use super::nodemap;
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 use super::nodemap::{NodeMap, NodeMapError};
Simon Sapin
rhg: use persistent nodemap when available...
r46706 use super::nodemap_docket::NodeMapDocket;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 use super::patch;
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 use crate::errors::HgError;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 use crate::revlog::Revision;
Martin von Zweigbergk
rust-revlog: make unaware of `Repo`...
r49980 use crate::vfs::Vfs;
Simon Sapin
rust: Make private the `index` field of the `Revlog` struct...
r48781 use crate::{Node, NULL_REVISION};
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097
Simon Sapin
rhg: desambiguate status without decompressing filelog if possible...
r49378 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
// Keep this in sync with REVIDX_KNOWN_FLAGS in
// mercurial/revlogutils/flagutil.py
const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
| REVISION_FLAG_ELLIPSIS
| REVISION_FLAG_EXTSTORED
| REVISION_FLAG_HASCOPIESINFO;
Arseniy Alekseyev
rhg: correctly handle the case where diffs are encoded relative to nullrev...
r50105 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
Martin von Zweigbergk
rust-revlog: add tests for p1/p2 getters, as promised in D12442...
r49985 #[derive(Debug, derive_more::From)]
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 pub enum RevlogError {
InvalidRevision,
Pulkit Goyal
rhg: raise wdir specific error for `hg debugdata`...
r47577 /// Working directory is not supported
WDirUnsupported,
Simon Sapin
rhg: allow specifying a changeset ID prefix...
r46646 /// Found more than one entry whose ID match the requested prefix
AmbiguousPrefix,
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 #[from]
Other(HgError),
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 impl From<NodeMapError> for RevlogError {
fn from(error: NodeMapError) -> Self {
match error {
NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
Raphaël Gomès
rust: don't swallow valuable error information...
r50269 NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
format!("nodemap point to revision {} not in index", rev),
),
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 }
}
}
Raphaël Gomès
rust: don't swallow valuable error information...
r50269 fn corrupted<S: AsRef<str>>(context: S) -> HgError {
HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
Simon Sapin
rhg: Expose FilelogEntry that wraps RevlogEntry...
r49374 }
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 impl RevlogError {
Raphaël Gomès
rust: don't swallow valuable error information...
r50269 fn corrupted<S: AsRef<str>>(context: S) -> Self {
RevlogError::Other(corrupted(context))
Simon Sapin
rust: use the bytes-cast crate to parse persistent nodemaps...
r47119 }
}
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 /// Read only implementation of revlog.
pub struct Revlog {
/// When index and data are not interleaved: bytes of the revlog index.
/// When index and data are interleaved: bytes of the revlog index and
/// data.
Simon Sapin
rust: Make private the `index` field of the `Revlog` struct...
r48781 index: Index,
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 /// When index and data are not interleaved: bytes of the revlog data
data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
Simon Sapin
rhg: use persistent nodemap when available...
r46706 /// When present on disk: the persistent nodemap for this revlog
nodemap: Option<nodemap::NodeTree>,
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
impl Revlog {
/// Open a revlog index file.
///
/// It will also open the associated data file if index and data are not
/// interleaved.
Antoine cezar
hg-core: fix path encoding usage...
r46408 pub fn open(
Martin von Zweigbergk
rust-revlog: make unaware of `Repo`...
r49980 store_vfs: &Vfs,
Simon Sapin
rust: introduce Repo and Vfs types for filesystem abstraction...
r46782 index_path: impl AsRef<Path>,
Antoine cezar
hg-core: fix path encoding usage...
r46408 data_path: Option<&Path>,
Martin von Zweigbergk
rust-revlog: move check for nodemap requirement to caller...
r49978 use_nodemap: bool,
Simon Sapin
rust: Return HgError instead of RevlogError in revlog constructors...
r48777 ) -> Result<Self, HgError> {
Simon Sapin
rust: introduce Repo and Vfs types for filesystem abstraction...
r46782 let index_path = index_path.as_ref();
Arseniy Alekseyev
rhg: do not fail when the repo is empty...
r49013 let index = {
Martin von Zweigbergk
rust-revlog: make unaware of `Repo`...
r49980 match store_vfs.mmap_open_opt(&index_path)? {
Arseniy Alekseyev
rhg: do not fail when the repo is empty...
r49013 None => Index::new(Box::new(vec![])),
Some(index_mmap) => {
let index = Index::new(Box::new(index_mmap))?;
Ok(index)
}
}
}?;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097
Antoine cezar
hg-core: fix path encoding usage...
r46408 let default_data_path = index_path.with_extension("d");
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 // type annotation required
// won't recognize Mmap as Deref<Target = [u8]>
let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 if index.is_inline() {
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 None
} else {
Antoine cezar
hg-core: fix path encoding usage...
r46408 let data_path = data_path.unwrap_or(&default_data_path);
Martin von Zweigbergk
rust-revlog: make unaware of `Repo`...
r49980 let data_mmap = store_vfs.mmap_open(data_path)?;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 Some(Box::new(data_mmap))
};
Arseniy Alekseyev
rhg: do not try to open a nodemap for an inline index...
r49040 let nodemap = if index.is_inline() {
None
Martin von Zweigbergk
rust-revlog: move check for nodemap requirement to caller...
r49978 } else if !use_nodemap {
Martin von Zweigbergk
rust-nodemap-docket: move check of nodemap requirement to caller...
r49976 None
Arseniy Alekseyev
rhg: do not try to open a nodemap for an inline index...
r49040 } else {
Martin von Zweigbergk
rust-revlog: make unaware of `Repo`...
r49980 NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
Arseniy Alekseyev
rhg: do not try to open a nodemap for an inline index...
r49040 |(docket, data)| {
nodemap::NodeTree::load_bytes(
Box::new(data),
docket.data_length,
)
},
)
};
Simon Sapin
rhg: use persistent nodemap when available...
r46706
Ok(Revlog {
index,
data_bytes,
nodemap,
})
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
Antoine Cezar
hg-core: add `Revlog.get_node_rev`...
r46105 /// Return number of entries of the `Revlog`.
pub fn len(&self) -> usize {
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 self.index.len()
Antoine Cezar
hg-core: add `Revlog.get_node_rev`...
r46105 }
/// Returns `true` if the `Revlog` has zero `entries`.
pub fn is_empty(&self) -> bool {
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 self.index.is_empty()
Antoine Cezar
hg-core: add `Revlog.get_node_rev`...
r46105 }
Simon Sapin
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev...
r48783 /// Returns the node ID for the given revision number, if it exists in this
/// revlog
Simon Sapin
rust: Make private the `index` field of the `Revlog` struct...
r48781 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
Arseniy Alekseyev
rhg: fix `hg cat` interaction with null revision...
r49050 if rev == NULL_REVISION {
return Some(&NULL_NODE);
}
Simon Sapin
rust: Make private the `index` field of the `Revlog` struct...
r48781 Some(self.index.get_entry(rev)?.hash())
}
Simon Sapin
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev...
r48783 /// Return the revision number for the given node ID, if it exists in this
/// revlog
Simon Sapin
rust: Rename the `Revlog::get_node_rev` method to `rev_from_node`...
r48782 pub fn rev_from_node(
Simon Sapin
rust: use NodePrefix::from_hex instead of hex::decode directly...
r46647 &self,
Simon Sapin
rust: Make NodePrefix allocation-free and Copy, remove NodePrefixRef...
r47160 node: NodePrefix,
Simon Sapin
rust: use NodePrefix::from_hex instead of hex::decode directly...
r46647 ) -> Result<Revision, RevlogError> {
Simon Sapin
rhg: Align with Python on some revset parsing corner cases...
r48776 if node.is_prefix_of(&NULL_NODE) {
return Ok(NULL_REVISION);
}
Simon Sapin
rhg: use persistent nodemap when available...
r46706 if let Some(nodemap) = &self.nodemap {
return nodemap
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 .find_bin(&self.index, node)?
Simon Sapin
rhg: use persistent nodemap when available...
r46706 .ok_or(RevlogError::InvalidRevision);
}
// Fallback to linear scan when a persistent nodemap is not present.
// This happens when the persistent-nodemap experimental feature is not
// enabled, or for small revlogs.
//
// TODO: consider building a non-persistent nodemap in memory to
// optimize these cases.
Simon Sapin
rhg: allow specifying a changeset ID prefix...
r46646 let mut found_by_prefix = None;
Antoine Cezar
hg-core: add `Revlog.get_node_rev`...
r46105 for rev in (0..self.len() as Revision).rev() {
let index_entry =
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 self.index.get_entry(rev).ok_or(HgError::corrupted(
"revlog references a revision not in the index",
))?;
Simon Sapin
rust: use NodePrefix::from_hex instead of hex::decode directly...
r46647 if node == *index_entry.hash() {
Antoine Cezar
hg-core: add `Revlog.get_node_rev`...
r46105 return Ok(rev);
}
Simon Sapin
rust: use NodePrefix::from_hex instead of hex::decode directly...
r46647 if node.is_prefix_of(index_entry.hash()) {
Simon Sapin
rhg: allow specifying a changeset ID prefix...
r46646 if found_by_prefix.is_some() {
return Err(RevlogError::AmbiguousPrefix);
}
found_by_prefix = Some(rev)
}
Antoine Cezar
hg-core: add `Revlog.get_node_rev`...
r46105 }
Simon Sapin
rhg: allow specifying a changeset ID prefix...
r46646 found_by_prefix.ok_or(RevlogError::InvalidRevision)
Antoine Cezar
hg-core: add `Revlog.get_node_rev`...
r46105 }
Simon Sapin
rhg: centralize parsing of `--rev` CLI arguments...
r47162 /// Returns whether the given revision exists in this revlog.
pub fn has_rev(&self, rev: Revision) -> bool {
self.index.get_entry(rev).is_some()
}
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 /// Return the full data associated to a revision.
///
/// All entries required to build the final data out of deltas will be
/// retrieved as needed, and the deltas will be applied to the inital
/// snapshot to rebuild the final data.
Simon Sapin
rhg: Add RevlogEntry::data that does delta resolution...
r49373 pub fn get_rev_data(
&self,
rev: Revision,
) -> Result<Cow<[u8]>, RevlogError> {
Arseniy Alekseyev
rhg: handle null changelog and manifest revisions...
r49012 if rev == NULL_REVISION {
Simon Sapin
rhg: Add RevlogEntry::data that does delta resolution...
r49373 return Ok(Cow::Borrowed(&[]));
Arseniy Alekseyev
rhg: handle null changelog and manifest revisions...
r49012 };
Simon Sapin
rhg: Expose FilelogEntry that wraps RevlogEntry...
r49374 Ok(self.get_entry(rev)?.data()?)
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 /// Check the hash of some given data against the recorded hash.
pub fn check_hash(
&self,
p1: Revision,
p2: Revision,
expected: &[u8],
data: &[u8],
) -> bool {
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 let e1 = self.index.get_entry(p1);
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 let h1 = match e1 {
Some(ref entry) => entry.hash(),
Simon Sapin
rust: use NodePrefix::from_hex instead of hex::decode directly...
r46647 None => &NULL_NODE,
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 };
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 let e2 = self.index.get_entry(p2);
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 let h2 = match e2 {
Some(ref entry) => entry.hash(),
Simon Sapin
rust: use NodePrefix::from_hex instead of hex::decode directly...
r46647 None => &NULL_NODE,
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 };
Simon Sapin
rust: Use a maintained crate for SHA-1 hashing...
r48171 &hash(data, h1.as_bytes(), h2.as_bytes()) == expected
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 }
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 /// Build the full data of a revision out its snapshot
/// and its deltas.
fn build_data_from_deltas(
snapshot: RevlogEntry,
deltas: &[RevlogEntry],
Simon Sapin
rhg: Expose FilelogEntry that wraps RevlogEntry...
r49374 ) -> Result<Vec<u8>, HgError> {
Simon Sapin
rhg: Rename some revlog-related types and methods...
r49372 let snapshot = snapshot.data_chunk()?;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 let deltas = deltas
.iter()
.rev()
Simon Sapin
rhg: Rename some revlog-related types and methods...
r49372 .map(RevlogEntry::data_chunk)
Simon Sapin
rhg: Expose FilelogEntry that wraps RevlogEntry...
r49374 .collect::<Result<Vec<_>, _>>()?;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 let patches: Vec<_> =
deltas.iter().map(|d| patch::PatchList::new(d)).collect();
let patch = patch::fold_patch_lists(&patches);
Ok(patch.apply(&snapshot))
}
/// Return the revlog data.
fn data(&self) -> &[u8] {
match self.data_bytes {
Some(ref data_bytes) => &data_bytes,
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 None => panic!(
"forgot to load the data or trying to access inline data"
),
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
}
Arseniy Alekseyev
rhg: correctly handle the case where diffs are encoded relative to nullrev...
r50105 pub fn make_null_entry(&self) -> RevlogEntry {
RevlogEntry {
revlog: self,
rev: NULL_REVISION,
bytes: b"",
compressed_len: 0,
uncompressed_len: 0,
base_rev_or_base_of_delta_chain: None,
p1: NULL_REVISION,
p2: NULL_REVISION,
flags: NULL_REVLOG_ENTRY_FLAGS,
hash: NULL_NODE,
}
}
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 /// Get an entry of the revlog.
Simon Sapin
rhg: Expose FilelogEntry that wraps RevlogEntry...
r49374 pub fn get_entry(
&self,
rev: Revision,
) -> Result<RevlogEntry, RevlogError> {
Arseniy Alekseyev
rhg: correctly handle the case where diffs are encoded relative to nullrev...
r50105 if rev == NULL_REVISION {
return Ok(self.make_null_entry());
}
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 let index_entry = self
.index
.get_entry(rev)
.ok_or(RevlogError::InvalidRevision)?;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 let start = index_entry.offset();
Simon Sapin
rhg: RevlogEntry::uncompressed_len is signed...
r49375 let end = start + index_entry.compressed_len() as usize;
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 let data = if self.index.is_inline() {
self.index.data(start, end)
} else {
&self.data()[start..end]
};
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 let entry = RevlogEntry {
Simon Sapin
rhg: Add RevlogEntry::data that does delta resolution...
r49373 revlog: self,
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 rev,
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 bytes: data,
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 compressed_len: index_entry.compressed_len(),
uncompressed_len: index_entry.uncompressed_len(),
Arseniy Alekseyev
rhg: fix a crash on non-generaldelta revlogs...
r49289 base_rev_or_base_of_delta_chain: if index_entry
.base_revision_or_base_of_delta_chain()
== rev
{
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 None
} else {
Arseniy Alekseyev
rhg: fix a crash on non-generaldelta revlogs...
r49289 Some(index_entry.base_revision_or_base_of_delta_chain())
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 },
Simon Sapin
rhg: Store p1, p2, and hash in RevlogEntry...
r49376 p1: index_entry.p1(),
p2: index_entry.p2(),
Simon Sapin
rhg: desambiguate status without decompressing filelog if possible...
r49378 flags: index_entry.flags(),
Simon Sapin
rhg: Store p1, p2, and hash in RevlogEntry...
r49376 hash: *index_entry.hash(),
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 };
Ok(entry)
}
Arseniy Alekseyev
rhg: fix a crash on non-generaldelta revlogs...
r49289
/// when resolving internal references within revlog, any errors
/// should be reported as corruption, instead of e.g. "invalid revision"
fn get_entry_internal(
&self,
rev: Revision,
Simon Sapin
rhg: Expose FilelogEntry that wraps RevlogEntry...
r49374 ) -> Result<RevlogEntry, HgError> {
Raphaël Gomès
rust: don't swallow valuable error information...
r50269 self.get_entry(rev)
.map_err(|_| corrupted(format!("revision {} out of range", rev)))
Arseniy Alekseyev
rhg: fix a crash on non-generaldelta revlogs...
r49289 }
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
/// The revlog entry's bytes and the necessary informations to extract
/// the entry's data.
Simon Sapin
rhg: Add RevlogEntry::data that does delta resolution...
r49373 #[derive(Clone)]
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 pub struct RevlogEntry<'a> {
Simon Sapin
rhg: Add RevlogEntry::data that does delta resolution...
r49373 revlog: &'a Revlog,
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 rev: Revision,
bytes: &'a [u8],
Simon Sapin
rhg: RevlogEntry::uncompressed_len is signed...
r49375 compressed_len: u32,
uncompressed_len: i32,
Arseniy Alekseyev
rhg: fix a crash on non-generaldelta revlogs...
r49289 base_rev_or_base_of_delta_chain: Option<Revision>,
Simon Sapin
rhg: Store p1, p2, and hash in RevlogEntry...
r49376 p1: Revision,
p2: Revision,
Simon Sapin
rhg: desambiguate status without decompressing filelog if possible...
r49378 flags: u16,
Simon Sapin
rhg: Store p1, p2, and hash in RevlogEntry...
r49376 hash: Node,
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
impl<'a> RevlogEntry<'a> {
Martin von Zweigbergk
hg-core: silence dead-code warning by adding RevlogEntry::revion() accessor...
r48895 pub fn revision(&self) -> Revision {
self.rev
}
Martin von Zweigbergk
rust-revlog: add methods for getting parent revs and entries...
r49939 pub fn node(&self) -> &Node {
&self.hash
}
Simon Sapin
rhg: RevlogEntry::uncompressed_len is signed...
r49375 pub fn uncompressed_len(&self) -> Option<u32> {
u32::try_from(self.uncompressed_len).ok()
}
Simon Sapin
rhg: desambiguate status without decompressing filelog if possible...
r49378 pub fn has_p1(&self) -> bool {
self.p1 != NULL_REVISION
}
Martin von Zweigbergk
rust-revlog: add methods for getting parent revs and entries...
r49939 pub fn p1_entry(&self) -> Result<Option<RevlogEntry>, RevlogError> {
if self.p1 == NULL_REVISION {
Ok(None)
} else {
Ok(Some(self.revlog.get_entry(self.p1)?))
}
}
pub fn p2_entry(&self) -> Result<Option<RevlogEntry>, RevlogError> {
if self.p2 == NULL_REVISION {
Ok(None)
} else {
Ok(Some(self.revlog.get_entry(self.p2)?))
}
}
pub fn p1(&self) -> Option<Revision> {
if self.p1 == NULL_REVISION {
None
} else {
Some(self.p1)
}
}
pub fn p2(&self) -> Option<Revision> {
if self.p2 == NULL_REVISION {
None
} else {
Some(self.p2)
}
}
Arseniy Alekseyev
censor: make rhg fall back to python when encountering a censored node...
r50069 pub fn is_censored(&self) -> bool {
Simon Sapin
rhg: desambiguate status without decompressing filelog if possible...
r49378 (self.flags & REVISION_FLAG_CENSORED) != 0
}
pub fn has_length_affecting_flag_processor(&self) -> bool {
// Relevant Python code: revlog.size()
// note: ELLIPSIS is known to not change the content
(self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
}
Simon Sapin
rhg: Add RevlogEntry::data that does delta resolution...
r49373 /// The data for this entry, after resolving deltas if any.
Arseniy Alekseyev
censor: make rhg fall back to python when encountering a censored node...
r50069 pub fn rawdata(&self) -> Result<Cow<'a, [u8]>, HgError> {
Simon Sapin
rhg: Add RevlogEntry::data that does delta resolution...
r49373 let mut entry = self.clone();
let mut delta_chain = vec![];
// The meaning of `base_rev_or_base_of_delta_chain` depends on
// generaldelta. See the doc on `ENTRY_DELTA_BASE` in
// `mercurial/revlogutils/constants.py` and the code in
// [_chaininfo] and in [index_deltachain].
let uses_generaldelta = self.revlog.index.uses_generaldelta();
while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
let base_rev = if uses_generaldelta {
base_rev
} else {
entry.rev - 1
};
delta_chain.push(entry);
entry = self.revlog.get_entry_internal(base_rev)?;
}
let data = if delta_chain.is_empty() {
entry.data_chunk()?
} else {
Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
};
Arseniy Alekseyev
censor: make rhg fall back to python when encountering a censored node...
r50069 Ok(data)
}
fn check_data(
&self,
data: Cow<'a, [u8]>,
) -> Result<Cow<'a, [u8]>, HgError> {
Simon Sapin
rhg: Add RevlogEntry::data that does delta resolution...
r49373 if self.revlog.check_hash(
Simon Sapin
rhg: Store p1, p2, and hash in RevlogEntry...
r49376 self.p1,
self.p2,
self.hash.as_bytes(),
Simon Sapin
rhg: Add RevlogEntry::data that does delta resolution...
r49373 &data,
) {
Ok(data)
} else {
Raphaël Gomès
rhg: fallback when encountering ellipsis revisions...
r50454 if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
return Err(HgError::unsupported(
"ellipsis revisions are not supported by rhg",
));
}
Raphaël Gomès
rust: don't swallow valuable error information...
r50269 Err(corrupted(format!(
"hash check failed for revision {}",
self.rev
)))
Simon Sapin
rhg: Add RevlogEntry::data that does delta resolution...
r49373 }
}
Arseniy Alekseyev
censor: make rhg fall back to python when encountering a censored node...
r50069 pub fn data(&self) -> Result<Cow<'a, [u8]>, HgError> {
let data = self.rawdata()?;
if self.is_censored() {
return Err(HgError::CensoredNodeError);
}
self.check_data(data)
}
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 /// Extract the data contained in the entry.
Simon Sapin
rhg: Rename some revlog-related types and methods...
r49372 /// This may be a delta. (See `is_delta`.)
Simon Sapin
rhg: Expose FilelogEntry that wraps RevlogEntry...
r49374 fn data_chunk(&self) -> Result<Cow<'a, [u8]>, HgError> {
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 if self.bytes.is_empty() {
return Ok(Cow::Borrowed(&[]));
}
match self.bytes[0] {
// Revision data is the entirety of the entry, including this
// header.
b'\0' => Ok(Cow::Borrowed(self.bytes)),
// Raw revision data follows.
b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
// zlib (RFC 1950) data.
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 // zstd data.
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 // A proper new format should have had a repo/store requirement.
Raphaël Gomès
rust: don't swallow valuable error information...
r50269 format_type => Err(corrupted(format!(
"unknown compression header '{}'",
format_type
))),
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
}
Simon Sapin
rhg: Expose FilelogEntry that wraps RevlogEntry...
r49374 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 let mut decoder = ZlibDecoder::new(self.bytes);
if self.is_delta() {
Simon Sapin
rhg: RevlogEntry::uncompressed_len is signed...
r49375 let mut buf = Vec::with_capacity(self.compressed_len as usize);
Raphaël Gomès
rust: don't swallow valuable error information...
r50269 decoder
.read_to_end(&mut buf)
.map_err(|e| corrupted(e.to_string()))?;
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 Ok(buf)
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 } else {
Simon Sapin
rhg: RevlogEntry::uncompressed_len is signed...
r49375 let cap = self.uncompressed_len.max(0) as usize;
let mut buf = vec![0; cap];
Raphaël Gomès
rust: don't swallow valuable error information...
r50269 decoder
.read_exact(&mut buf)
.map_err(|e| corrupted(e.to_string()))?;
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 Ok(buf)
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
}
Simon Sapin
rhg: Expose FilelogEntry that wraps RevlogEntry...
r49374 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 if self.is_delta() {
Simon Sapin
rhg: RevlogEntry::uncompressed_len is signed...
r49375 let mut buf = Vec::with_capacity(self.compressed_len as usize);
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 zstd::stream::copy_decode(self.bytes, &mut buf)
Raphaël Gomès
rust: don't swallow valuable error information...
r50269 .map_err(|e| corrupted(e.to_string()))?;
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 Ok(buf)
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 } else {
Simon Sapin
rhg: RevlogEntry::uncompressed_len is signed...
r49375 let cap = self.uncompressed_len.max(0) as usize;
let mut buf = vec![0; cap];
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
Raphaël Gomès
rust: don't swallow valuable error information...
r50269 .map_err(|e| corrupted(e.to_string()))?;
Simon Sapin
rhg: RevlogEntry::uncompressed_len is signed...
r49375 if len != self.uncompressed_len as usize {
Raphaël Gomès
rust: don't swallow valuable error information...
r50269 Err(corrupted("uncompressed length does not match"))
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 } else {
Ok(buf)
}
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
}
/// Tell if the entry is a snapshot or a delta
/// (influences on decompression).
fn is_delta(&self) -> bool {
Arseniy Alekseyev
rhg: fix a crash on non-generaldelta revlogs...
r49289 self.base_rev_or_base_of_delta_chain.is_some()
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
}
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 /// Calculate the hash of a revision given its data and its parents.
Simon Sapin
rust: Use a maintained crate for SHA-1 hashing...
r48171 fn hash(
data: &[u8],
p1_hash: &[u8],
p2_hash: &[u8],
) -> [u8; NODE_BYTES_LENGTH] {
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 let mut hasher = Sha1::new();
let (a, b) = (p1_hash, p2_hash);
if a > b {
Simon Sapin
rust: Use a maintained crate for SHA-1 hashing...
r48171 hasher.update(b);
hasher.update(a);
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 } else {
Simon Sapin
rust: Use a maintained crate for SHA-1 hashing...
r48171 hasher.update(a);
hasher.update(b);
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 }
Simon Sapin
rust: Use a maintained crate for SHA-1 hashing...
r48171 hasher.update(data);
*hasher.finalize().as_ref()
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 }
Martin von Zweigbergk
rust-revlog: add tests for p1/p2 getters, as promised in D12442...
r49985
#[cfg(test)]
mod tests {
use super::*;
use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
use itertools::Itertools;
#[test]
fn test_empty() {
let temp = tempfile::tempdir().unwrap();
let vfs = Vfs { base: temp.path() };
std::fs::write(temp.path().join("foo.i"), b"").unwrap();
let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
assert!(revlog.is_empty());
assert_eq!(revlog.len(), 0);
assert!(revlog.get_entry(0).is_err());
assert!(!revlog.has_rev(0));
}
#[test]
fn test_inline() {
let temp = tempfile::tempdir().unwrap();
let vfs = Vfs { base: temp.path() };
let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
.unwrap();
let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
.unwrap();
let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
.unwrap();
let entry0_bytes = IndexEntryBuilder::new()
.is_first(true)
.with_version(1)
.with_inline(true)
.with_offset(INDEX_ENTRY_SIZE)
.with_node(node0)
.build();
let entry1_bytes = IndexEntryBuilder::new()
.with_offset(INDEX_ENTRY_SIZE)
.with_node(node1)
.build();
let entry2_bytes = IndexEntryBuilder::new()
.with_offset(INDEX_ENTRY_SIZE)
.with_p1(0)
.with_p2(1)
.with_node(node2)
.build();
let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
.into_iter()
.flatten()
.collect_vec();
std::fs::write(temp.path().join("foo.i"), contents).unwrap();
let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
let entry0 = revlog.get_entry(0).ok().unwrap();
assert_eq!(entry0.revision(), 0);
assert_eq!(*entry0.node(), node0);
assert!(!entry0.has_p1());
assert_eq!(entry0.p1(), None);
assert_eq!(entry0.p2(), None);
let p1_entry = entry0.p1_entry().unwrap();
assert!(p1_entry.is_none());
let p2_entry = entry0.p2_entry().unwrap();
assert!(p2_entry.is_none());
let entry1 = revlog.get_entry(1).ok().unwrap();
assert_eq!(entry1.revision(), 1);
assert_eq!(*entry1.node(), node1);
assert!(!entry1.has_p1());
assert_eq!(entry1.p1(), None);
assert_eq!(entry1.p2(), None);
let p1_entry = entry1.p1_entry().unwrap();
assert!(p1_entry.is_none());
let p2_entry = entry1.p2_entry().unwrap();
assert!(p2_entry.is_none());
let entry2 = revlog.get_entry(2).ok().unwrap();
assert_eq!(entry2.revision(), 2);
assert_eq!(*entry2.node(), node2);
assert!(entry2.has_p1());
assert_eq!(entry2.p1(), Some(0));
assert_eq!(entry2.p2(), Some(1));
let p1_entry = entry2.p1_entry().unwrap();
assert!(p1_entry.is_some());
assert_eq!(p1_entry.unwrap().revision(), 0);
let p2_entry = entry2.p2_entry().unwrap();
assert!(p2_entry.is_some());
assert_eq!(p2_entry.unwrap().revision(), 1);
}
}