##// END OF EJS Templates
worker: avoid reading 1 byte at a time from the OS pipe...
worker: avoid reading 1 byte at a time from the OS pipe Apparently `pickle.load` does a lot of small reads, many of them literally 1-byte, so it benefits greatly from buffering. This change enables the buffering, at the cost of more complicated interaction with the `selector` API. On one repository with ~400k files this reduces the time by about ~30s, from ~60 to ~30s. The difference is so large because the actual updating work is parallellized, while these small reads are bottlenecking the central hg process.

File last commit:

r50530:a5447a4a default
r50794:3eef8baf default
Show More
revlog.rs
643 lines | 19.9 KiB | application/rls-services+xml | RustLexer
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 use std::borrow::Cow;
use std::io::Read;
use std::ops::Deref;
use std::path::Path;
use flate2::read::ZlibDecoder;
Simon Sapin
rust: Use a maintained crate for SHA-1 hashing...
r48171 use sha1::{Digest, Sha1};
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 use zstd;
use super::index::Index;
Simon Sapin
rust: Make NodePrefix allocation-free and Copy, remove NodePrefixRef...
r47160 use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
Simon Sapin
rhg: use persistent nodemap when available...
r46706 use super::nodemap;
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 use super::nodemap::{NodeMap, NodeMapError};
Simon Sapin
rhg: use persistent nodemap when available...
r46706 use super::nodemap_docket::NodeMapDocket;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 use super::patch;
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 use crate::errors::HgError;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 use crate::revlog::Revision;
Martin von Zweigbergk
rust-revlog: make unaware of `Repo`...
r49980 use crate::vfs::Vfs;
Simon Sapin
rust: Make private the `index` field of the `Revlog` struct...
r48781 use crate::{Node, NULL_REVISION};
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097
Simon Sapin
rhg: desambiguate status without decompressing filelog if possible...
r49378 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
// Keep this in sync with REVIDX_KNOWN_FLAGS in
// mercurial/revlogutils/flagutil.py
const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
| REVISION_FLAG_ELLIPSIS
| REVISION_FLAG_EXTSTORED
| REVISION_FLAG_HASCOPIESINFO;
Arseniy Alekseyev
rhg: correctly handle the case where diffs are encoded relative to nullrev...
r50105 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
Martin von Zweigbergk
rust-revlog: add tests for p1/p2 getters, as promised in D12442...
r49985 #[derive(Debug, derive_more::From)]
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 pub enum RevlogError {
InvalidRevision,
Pulkit Goyal
rhg: raise wdir specific error for `hg debugdata`...
r47577 /// Working directory is not supported
WDirUnsupported,
Simon Sapin
rhg: allow specifying a changeset ID prefix...
r46646 /// Found more than one entry whose ID match the requested prefix
AmbiguousPrefix,
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 #[from]
Other(HgError),
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 impl From<NodeMapError> for RevlogError {
fn from(error: NodeMapError) -> Self {
match error {
NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
Raphaël Gomès
rust: don't swallow valuable error information...
r50269 NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
format!("nodemap point to revision {} not in index", rev),
),
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 }
}
}
Raphaël Gomès
rust: don't swallow valuable error information...
r50269 fn corrupted<S: AsRef<str>>(context: S) -> HgError {
HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
Simon Sapin
rhg: Expose FilelogEntry that wraps RevlogEntry...
r49374 }
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 impl RevlogError {
Raphaël Gomès
rust: don't swallow valuable error information...
r50269 fn corrupted<S: AsRef<str>>(context: S) -> Self {
RevlogError::Other(corrupted(context))
Simon Sapin
rust: use the bytes-cast crate to parse persistent nodemaps...
r47119 }
}
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 /// Read only implementation of revlog.
pub struct Revlog {
/// When index and data are not interleaved: bytes of the revlog index.
/// When index and data are interleaved: bytes of the revlog index and
/// data.
Simon Sapin
rust: Make private the `index` field of the `Revlog` struct...
r48781 index: Index,
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 /// When index and data are not interleaved: bytes of the revlog data
data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
Simon Sapin
rhg: use persistent nodemap when available...
r46706 /// When present on disk: the persistent nodemap for this revlog
nodemap: Option<nodemap::NodeTree>,
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
impl Revlog {
/// Open a revlog index file.
///
/// It will also open the associated data file if index and data are not
/// interleaved.
Antoine cezar
hg-core: fix path encoding usage...
r46408 pub fn open(
Martin von Zweigbergk
rust-revlog: make unaware of `Repo`...
r49980 store_vfs: &Vfs,
Simon Sapin
rust: introduce Repo and Vfs types for filesystem abstraction...
r46782 index_path: impl AsRef<Path>,
Antoine cezar
hg-core: fix path encoding usage...
r46408 data_path: Option<&Path>,
Martin von Zweigbergk
rust-revlog: move check for nodemap requirement to caller...
r49978 use_nodemap: bool,
Simon Sapin
rust: Return HgError instead of RevlogError in revlog constructors...
r48777 ) -> Result<Self, HgError> {
Simon Sapin
rust: introduce Repo and Vfs types for filesystem abstraction...
r46782 let index_path = index_path.as_ref();
Arseniy Alekseyev
rhg: do not fail when the repo is empty...
r49013 let index = {
Martin von Zweigbergk
rust-revlog: make unaware of `Repo`...
r49980 match store_vfs.mmap_open_opt(&index_path)? {
Arseniy Alekseyev
rhg: do not fail when the repo is empty...
r49013 None => Index::new(Box::new(vec![])),
Some(index_mmap) => {
let index = Index::new(Box::new(index_mmap))?;
Ok(index)
}
}
}?;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097
Antoine cezar
hg-core: fix path encoding usage...
r46408 let default_data_path = index_path.with_extension("d");
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 // type annotation required
// won't recognize Mmap as Deref<Target = [u8]>
let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 if index.is_inline() {
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 None
} else {
Antoine cezar
hg-core: fix path encoding usage...
r46408 let data_path = data_path.unwrap_or(&default_data_path);
Martin von Zweigbergk
rust-revlog: make unaware of `Repo`...
r49980 let data_mmap = store_vfs.mmap_open(data_path)?;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 Some(Box::new(data_mmap))
};
Arseniy Alekseyev
rhg: do not try to open a nodemap for an inline index...
r49040 let nodemap = if index.is_inline() {
None
Martin von Zweigbergk
rust-revlog: move check for nodemap requirement to caller...
r49978 } else if !use_nodemap {
Martin von Zweigbergk
rust-nodemap-docket: move check of nodemap requirement to caller...
r49976 None
Arseniy Alekseyev
rhg: do not try to open a nodemap for an inline index...
r49040 } else {
Martin von Zweigbergk
rust-revlog: make unaware of `Repo`...
r49980 NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
Arseniy Alekseyev
rhg: do not try to open a nodemap for an inline index...
r49040 |(docket, data)| {
nodemap::NodeTree::load_bytes(
Box::new(data),
docket.data_length,
)
},
)
};
Simon Sapin
rhg: use persistent nodemap when available...
r46706
Ok(Revlog {
index,
data_bytes,
nodemap,
})
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
Antoine Cezar
hg-core: add `Revlog.get_node_rev`...
r46105 /// Return number of entries of the `Revlog`.
pub fn len(&self) -> usize {
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 self.index.len()
Antoine Cezar
hg-core: add `Revlog.get_node_rev`...
r46105 }
/// Returns `true` if the `Revlog` has zero `entries`.
pub fn is_empty(&self) -> bool {
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 self.index.is_empty()
Antoine Cezar
hg-core: add `Revlog.get_node_rev`...
r46105 }
Simon Sapin
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev...
r48783 /// Returns the node ID for the given revision number, if it exists in this
/// revlog
Simon Sapin
rust: Make private the `index` field of the `Revlog` struct...
r48781 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
Arseniy Alekseyev
rhg: fix `hg cat` interaction with null revision...
r49050 if rev == NULL_REVISION {
return Some(&NULL_NODE);
}
Simon Sapin
rust: Make private the `index` field of the `Revlog` struct...
r48781 Some(self.index.get_entry(rev)?.hash())
}
Simon Sapin
rust: Rename get_node methods to data_for_node, get_rev to data_for_rev...
r48783 /// Return the revision number for the given node ID, if it exists in this
/// revlog
Simon Sapin
rust: Rename the `Revlog::get_node_rev` method to `rev_from_node`...
r48782 pub fn rev_from_node(
Simon Sapin
rust: use NodePrefix::from_hex instead of hex::decode directly...
r46647 &self,
Simon Sapin
rust: Make NodePrefix allocation-free and Copy, remove NodePrefixRef...
r47160 node: NodePrefix,
Simon Sapin
rust: use NodePrefix::from_hex instead of hex::decode directly...
r46647 ) -> Result<Revision, RevlogError> {
Simon Sapin
rhg: Align with Python on some revset parsing corner cases...
r48776 if node.is_prefix_of(&NULL_NODE) {
return Ok(NULL_REVISION);
}
Simon Sapin
rhg: use persistent nodemap when available...
r46706 if let Some(nodemap) = &self.nodemap {
return nodemap
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 .find_bin(&self.index, node)?
Simon Sapin
rhg: use persistent nodemap when available...
r46706 .ok_or(RevlogError::InvalidRevision);
}
// Fallback to linear scan when a persistent nodemap is not present.
// This happens when the persistent-nodemap experimental feature is not
// enabled, or for small revlogs.
//
// TODO: consider building a non-persistent nodemap in memory to
// optimize these cases.
Simon Sapin
rhg: allow specifying a changeset ID prefix...
r46646 let mut found_by_prefix = None;
Antoine Cezar
hg-core: add `Revlog.get_node_rev`...
r46105 for rev in (0..self.len() as Revision).rev() {
let index_entry =
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 self.index.get_entry(rev).ok_or(HgError::corrupted(
"revlog references a revision not in the index",
))?;
Simon Sapin
rust: use NodePrefix::from_hex instead of hex::decode directly...
r46647 if node == *index_entry.hash() {
Antoine Cezar
hg-core: add `Revlog.get_node_rev`...
r46105 return Ok(rev);
}
Simon Sapin
rust: use NodePrefix::from_hex instead of hex::decode directly...
r46647 if node.is_prefix_of(index_entry.hash()) {
Simon Sapin
rhg: allow specifying a changeset ID prefix...
r46646 if found_by_prefix.is_some() {
return Err(RevlogError::AmbiguousPrefix);
}
found_by_prefix = Some(rev)
}
Antoine Cezar
hg-core: add `Revlog.get_node_rev`...
r46105 }
Simon Sapin
rhg: allow specifying a changeset ID prefix...
r46646 found_by_prefix.ok_or(RevlogError::InvalidRevision)
Antoine Cezar
hg-core: add `Revlog.get_node_rev`...
r46105 }
Simon Sapin
rhg: centralize parsing of `--rev` CLI arguments...
r47162 /// Returns whether the given revision exists in this revlog.
pub fn has_rev(&self, rev: Revision) -> bool {
self.index.get_entry(rev).is_some()
}
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 /// Return the full data associated to a revision.
///
/// All entries required to build the final data out of deltas will be
/// retrieved as needed, and the deltas will be applied to the inital
/// snapshot to rebuild the final data.
Simon Sapin
rhg: Add RevlogEntry::data that does delta resolution...
r49373 pub fn get_rev_data(
&self,
rev: Revision,
) -> Result<Cow<[u8]>, RevlogError> {
Arseniy Alekseyev
rhg: handle null changelog and manifest revisions...
r49012 if rev == NULL_REVISION {
Simon Sapin
rhg: Add RevlogEntry::data that does delta resolution...
r49373 return Ok(Cow::Borrowed(&[]));
Arseniy Alekseyev
rhg: handle null changelog and manifest revisions...
r49012 };
Simon Sapin
rhg: Expose FilelogEntry that wraps RevlogEntry...
r49374 Ok(self.get_entry(rev)?.data()?)
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 /// Check the hash of some given data against the recorded hash.
pub fn check_hash(
&self,
p1: Revision,
p2: Revision,
expected: &[u8],
data: &[u8],
) -> bool {
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 let e1 = self.index.get_entry(p1);
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 let h1 = match e1 {
Some(ref entry) => entry.hash(),
Simon Sapin
rust: use NodePrefix::from_hex instead of hex::decode directly...
r46647 None => &NULL_NODE,
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 };
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 let e2 = self.index.get_entry(p2);
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 let h2 = match e2 {
Some(ref entry) => entry.hash(),
Simon Sapin
rust: use NodePrefix::from_hex instead of hex::decode directly...
r46647 None => &NULL_NODE,
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 };
Simon Sapin
rust: Use a maintained crate for SHA-1 hashing...
r48171 &hash(data, h1.as_bytes(), h2.as_bytes()) == expected
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 }
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 /// Build the full data of a revision out its snapshot
/// and its deltas.
fn build_data_from_deltas(
snapshot: RevlogEntry,
deltas: &[RevlogEntry],
Simon Sapin
rhg: Expose FilelogEntry that wraps RevlogEntry...
r49374 ) -> Result<Vec<u8>, HgError> {
Simon Sapin
rhg: Rename some revlog-related types and methods...
r49372 let snapshot = snapshot.data_chunk()?;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 let deltas = deltas
.iter()
.rev()
Simon Sapin
rhg: Rename some revlog-related types and methods...
r49372 .map(RevlogEntry::data_chunk)
Simon Sapin
rhg: Expose FilelogEntry that wraps RevlogEntry...
r49374 .collect::<Result<Vec<_>, _>>()?;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 let patches: Vec<_> =
deltas.iter().map(|d| patch::PatchList::new(d)).collect();
let patch = patch::fold_patch_lists(&patches);
Ok(patch.apply(&snapshot))
}
/// Return the revlog data.
fn data(&self) -> &[u8] {
match self.data_bytes {
Some(ref data_bytes) => &data_bytes,
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 None => panic!(
"forgot to load the data or trying to access inline data"
),
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
}
Arseniy Alekseyev
rhg: correctly handle the case where diffs are encoded relative to nullrev...
r50105 pub fn make_null_entry(&self) -> RevlogEntry {
RevlogEntry {
revlog: self,
rev: NULL_REVISION,
bytes: b"",
compressed_len: 0,
uncompressed_len: 0,
base_rev_or_base_of_delta_chain: None,
p1: NULL_REVISION,
p2: NULL_REVISION,
flags: NULL_REVLOG_ENTRY_FLAGS,
hash: NULL_NODE,
}
}
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 /// Get an entry of the revlog.
Simon Sapin
rhg: Expose FilelogEntry that wraps RevlogEntry...
r49374 pub fn get_entry(
&self,
rev: Revision,
) -> Result<RevlogEntry, RevlogError> {
Arseniy Alekseyev
rhg: correctly handle the case where diffs are encoded relative to nullrev...
r50105 if rev == NULL_REVISION {
return Ok(self.make_null_entry());
}
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 let index_entry = self
.index
.get_entry(rev)
.ok_or(RevlogError::InvalidRevision)?;
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 let start = index_entry.offset();
Simon Sapin
rhg: RevlogEntry::uncompressed_len is signed...
r49375 let end = start + index_entry.compressed_len() as usize;
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 let data = if self.index.is_inline() {
self.index.data(start, end)
} else {
&self.data()[start..end]
};
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 let entry = RevlogEntry {
Simon Sapin
rhg: Add RevlogEntry::data that does delta resolution...
r49373 revlog: self,
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 rev,
Antoine cezar
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)...
r46175 bytes: data,
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 compressed_len: index_entry.compressed_len(),
uncompressed_len: index_entry.uncompressed_len(),
Arseniy Alekseyev
rhg: fix a crash on non-generaldelta revlogs...
r49289 base_rev_or_base_of_delta_chain: if index_entry
.base_revision_or_base_of_delta_chain()
== rev
{
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 None
} else {
Arseniy Alekseyev
rhg: fix a crash on non-generaldelta revlogs...
r49289 Some(index_entry.base_revision_or_base_of_delta_chain())
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 },
Simon Sapin
rhg: Store p1, p2, and hash in RevlogEntry...
r49376 p1: index_entry.p1(),
p2: index_entry.p2(),
Simon Sapin
rhg: desambiguate status without decompressing filelog if possible...
r49378 flags: index_entry.flags(),
Simon Sapin
rhg: Store p1, p2, and hash in RevlogEntry...
r49376 hash: *index_entry.hash(),
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 };
Ok(entry)
}
Arseniy Alekseyev
rhg: fix a crash on non-generaldelta revlogs...
r49289
/// when resolving internal references within revlog, any errors
/// should be reported as corruption, instead of e.g. "invalid revision"
fn get_entry_internal(
&self,
rev: Revision,
Simon Sapin
rhg: Expose FilelogEntry that wraps RevlogEntry...
r49374 ) -> Result<RevlogEntry, HgError> {
Raphaël Gomès
rust: don't swallow valuable error information...
r50269 self.get_entry(rev)
.map_err(|_| corrupted(format!("revision {} out of range", rev)))
Arseniy Alekseyev
rhg: fix a crash on non-generaldelta revlogs...
r49289 }
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
/// The revlog entry's bytes and the necessary informations to extract
/// the entry's data.
Simon Sapin
rhg: Add RevlogEntry::data that does delta resolution...
r49373 #[derive(Clone)]
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 pub struct RevlogEntry<'a> {
Simon Sapin
rhg: Add RevlogEntry::data that does delta resolution...
r49373 revlog: &'a Revlog,
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 rev: Revision,
bytes: &'a [u8],
Simon Sapin
rhg: RevlogEntry::uncompressed_len is signed...
r49375 compressed_len: u32,
uncompressed_len: i32,
Arseniy Alekseyev
rhg: fix a crash on non-generaldelta revlogs...
r49289 base_rev_or_base_of_delta_chain: Option<Revision>,
Simon Sapin
rhg: Store p1, p2, and hash in RevlogEntry...
r49376 p1: Revision,
p2: Revision,
Simon Sapin
rhg: desambiguate status without decompressing filelog if possible...
r49378 flags: u16,
Simon Sapin
rhg: Store p1, p2, and hash in RevlogEntry...
r49376 hash: Node,
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
impl<'a> RevlogEntry<'a> {
Martin von Zweigbergk
hg-core: silence dead-code warning by adding RevlogEntry::revion() accessor...
r48895 pub fn revision(&self) -> Revision {
self.rev
}
Martin von Zweigbergk
rust-revlog: add methods for getting parent revs and entries...
r49939 pub fn node(&self) -> &Node {
&self.hash
}
Simon Sapin
rhg: RevlogEntry::uncompressed_len is signed...
r49375 pub fn uncompressed_len(&self) -> Option<u32> {
u32::try_from(self.uncompressed_len).ok()
}
Simon Sapin
rhg: desambiguate status without decompressing filelog if possible...
r49378 pub fn has_p1(&self) -> bool {
self.p1 != NULL_REVISION
}
Martin von Zweigbergk
rust-revlog: add methods for getting parent revs and entries...
r49939 pub fn p1_entry(&self) -> Result<Option<RevlogEntry>, RevlogError> {
if self.p1 == NULL_REVISION {
Ok(None)
} else {
Ok(Some(self.revlog.get_entry(self.p1)?))
}
}
pub fn p2_entry(&self) -> Result<Option<RevlogEntry>, RevlogError> {
if self.p2 == NULL_REVISION {
Ok(None)
} else {
Ok(Some(self.revlog.get_entry(self.p2)?))
}
}
pub fn p1(&self) -> Option<Revision> {
if self.p1 == NULL_REVISION {
None
} else {
Some(self.p1)
}
}
pub fn p2(&self) -> Option<Revision> {
if self.p2 == NULL_REVISION {
None
} else {
Some(self.p2)
}
}
Arseniy Alekseyev
censor: make rhg fall back to python when encountering a censored node...
r50069 pub fn is_censored(&self) -> bool {
Simon Sapin
rhg: desambiguate status without decompressing filelog if possible...
r49378 (self.flags & REVISION_FLAG_CENSORED) != 0
}
pub fn has_length_affecting_flag_processor(&self) -> bool {
// Relevant Python code: revlog.size()
// note: ELLIPSIS is known to not change the content
(self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
}
Simon Sapin
rhg: Add RevlogEntry::data that does delta resolution...
r49373 /// The data for this entry, after resolving deltas if any.
Arseniy Alekseyev
censor: make rhg fall back to python when encountering a censored node...
r50069 pub fn rawdata(&self) -> Result<Cow<'a, [u8]>, HgError> {
Simon Sapin
rhg: Add RevlogEntry::data that does delta resolution...
r49373 let mut entry = self.clone();
let mut delta_chain = vec![];
// The meaning of `base_rev_or_base_of_delta_chain` depends on
// generaldelta. See the doc on `ENTRY_DELTA_BASE` in
// `mercurial/revlogutils/constants.py` and the code in
// [_chaininfo] and in [index_deltachain].
let uses_generaldelta = self.revlog.index.uses_generaldelta();
while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
let base_rev = if uses_generaldelta {
base_rev
} else {
entry.rev - 1
};
delta_chain.push(entry);
entry = self.revlog.get_entry_internal(base_rev)?;
}
let data = if delta_chain.is_empty() {
entry.data_chunk()?
} else {
Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
};
Arseniy Alekseyev
censor: make rhg fall back to python when encountering a censored node...
r50069 Ok(data)
}
fn check_data(
&self,
data: Cow<'a, [u8]>,
) -> Result<Cow<'a, [u8]>, HgError> {
Simon Sapin
rhg: Add RevlogEntry::data that does delta resolution...
r49373 if self.revlog.check_hash(
Simon Sapin
rhg: Store p1, p2, and hash in RevlogEntry...
r49376 self.p1,
self.p2,
self.hash.as_bytes(),
Simon Sapin
rhg: Add RevlogEntry::data that does delta resolution...
r49373 &data,
) {
Ok(data)
} else {
Raphaël Gomès
rhg: fallback when encountering ellipsis revisions...
r50454 if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
return Err(HgError::unsupported(
"ellipsis revisions are not supported by rhg",
));
}
Raphaël Gomès
rust: don't swallow valuable error information...
r50269 Err(corrupted(format!(
"hash check failed for revision {}",
self.rev
)))
Simon Sapin
rhg: Add RevlogEntry::data that does delta resolution...
r49373 }
}
Arseniy Alekseyev
censor: make rhg fall back to python when encountering a censored node...
r50069 pub fn data(&self) -> Result<Cow<'a, [u8]>, HgError> {
let data = self.rawdata()?;
if self.is_censored() {
return Err(HgError::CensoredNodeError);
}
self.check_data(data)
}
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 /// Extract the data contained in the entry.
Simon Sapin
rhg: Rename some revlog-related types and methods...
r49372 /// This may be a delta. (See `is_delta`.)
Simon Sapin
rhg: Expose FilelogEntry that wraps RevlogEntry...
r49374 fn data_chunk(&self) -> Result<Cow<'a, [u8]>, HgError> {
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 if self.bytes.is_empty() {
return Ok(Cow::Borrowed(&[]));
}
match self.bytes[0] {
// Revision data is the entirety of the entry, including this
// header.
b'\0' => Ok(Cow::Borrowed(self.bytes)),
// Raw revision data follows.
b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
// zlib (RFC 1950) data.
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 // zstd data.
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
Simon Sapin
rust: use HgError in RevlogError and Vfs...
r47172 // A proper new format should have had a repo/store requirement.
Raphaël Gomès
rust: don't swallow valuable error information...
r50269 format_type => Err(corrupted(format!(
"unknown compression header '{}'",
format_type
))),
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
}
Simon Sapin
rhg: Expose FilelogEntry that wraps RevlogEntry...
r49374 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 let mut decoder = ZlibDecoder::new(self.bytes);
if self.is_delta() {
Simon Sapin
rhg: RevlogEntry::uncompressed_len is signed...
r49375 let mut buf = Vec::with_capacity(self.compressed_len as usize);
Raphaël Gomès
rust: don't swallow valuable error information...
r50269 decoder
.read_to_end(&mut buf)
.map_err(|e| corrupted(e.to_string()))?;
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 Ok(buf)
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 } else {
Simon Sapin
rhg: RevlogEntry::uncompressed_len is signed...
r49375 let cap = self.uncompressed_len.max(0) as usize;
let mut buf = vec![0; cap];
Raphaël Gomès
rust: don't swallow valuable error information...
r50269 decoder
.read_exact(&mut buf)
.map_err(|e| corrupted(e.to_string()))?;
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 Ok(buf)
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
}
Simon Sapin
rhg: Expose FilelogEntry that wraps RevlogEntry...
r49374 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 if self.is_delta() {
Simon Sapin
rhg: RevlogEntry::uncompressed_len is signed...
r49375 let mut buf = Vec::with_capacity(self.compressed_len as usize);
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 zstd::stream::copy_decode(self.bytes, &mut buf)
Raphaël Gomès
rust: don't swallow valuable error information...
r50269 .map_err(|e| corrupted(e.to_string()))?;
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 Ok(buf)
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 } else {
Simon Sapin
rhg: RevlogEntry::uncompressed_len is signed...
r49375 let cap = self.uncompressed_len.max(0) as usize;
let mut buf = vec![0; cap];
Raphaël Gomès
hg-core: upgrade `zstd` dependency...
r50530 let len = zstd::bulk::decompress_to_buffer(self.bytes, &mut buf)
Raphaël Gomès
rust: don't swallow valuable error information...
r50269 .map_err(|e| corrupted(e.to_string()))?;
Simon Sapin
rhg: RevlogEntry::uncompressed_len is signed...
r49375 if len != self.uncompressed_len as usize {
Raphaël Gomès
rust: don't swallow valuable error information...
r50269 Err(corrupted("uncompressed length does not match"))
Antoine cezar
hg-core: return `Err` on decompression error (D8958#inline-15004 followup)...
r46169 } else {
Ok(buf)
}
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
}
/// Tell if the entry is a snapshot or a delta
/// (influences on decompression).
fn is_delta(&self) -> bool {
Arseniy Alekseyev
rhg: fix a crash on non-generaldelta revlogs...
r49289 self.base_rev_or_base_of_delta_chain.is_some()
Antoine Cezar
hg-core: Add a limited read only `revlog` implementation...
r46097 }
}
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 /// Calculate the hash of a revision given its data and its parents.
Simon Sapin
rust: Use a maintained crate for SHA-1 hashing...
r48171 fn hash(
data: &[u8],
p1_hash: &[u8],
p2_hash: &[u8],
) -> [u8; NODE_BYTES_LENGTH] {
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 let mut hasher = Sha1::new();
let (a, b) = (p1_hash, p2_hash);
if a > b {
Simon Sapin
rust: Use a maintained crate for SHA-1 hashing...
r48171 hasher.update(b);
hasher.update(a);
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 } else {
Simon Sapin
rust: Use a maintained crate for SHA-1 hashing...
r48171 hasher.update(a);
hasher.update(b);
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 }
Simon Sapin
rust: Use a maintained crate for SHA-1 hashing...
r48171 hasher.update(data);
*hasher.finalize().as_ref()
Antoine Cezar
hg-core: check data integrity in `Revlog`...
r46102 }
Martin von Zweigbergk
rust-revlog: add tests for p1/p2 getters, as promised in D12442...
r49985
#[cfg(test)]
mod tests {
use super::*;
use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
use itertools::Itertools;
#[test]
fn test_empty() {
let temp = tempfile::tempdir().unwrap();
let vfs = Vfs { base: temp.path() };
std::fs::write(temp.path().join("foo.i"), b"").unwrap();
let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
assert!(revlog.is_empty());
assert_eq!(revlog.len(), 0);
assert!(revlog.get_entry(0).is_err());
assert!(!revlog.has_rev(0));
}
#[test]
fn test_inline() {
let temp = tempfile::tempdir().unwrap();
let vfs = Vfs { base: temp.path() };
let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
.unwrap();
let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
.unwrap();
let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
.unwrap();
let entry0_bytes = IndexEntryBuilder::new()
.is_first(true)
.with_version(1)
.with_inline(true)
.with_offset(INDEX_ENTRY_SIZE)
.with_node(node0)
.build();
let entry1_bytes = IndexEntryBuilder::new()
.with_offset(INDEX_ENTRY_SIZE)
.with_node(node1)
.build();
let entry2_bytes = IndexEntryBuilder::new()
.with_offset(INDEX_ENTRY_SIZE)
.with_p1(0)
.with_p2(1)
.with_node(node2)
.build();
let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
.into_iter()
.flatten()
.collect_vec();
std::fs::write(temp.path().join("foo.i"), contents).unwrap();
let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
let entry0 = revlog.get_entry(0).ok().unwrap();
assert_eq!(entry0.revision(), 0);
assert_eq!(*entry0.node(), node0);
assert!(!entry0.has_p1());
assert_eq!(entry0.p1(), None);
assert_eq!(entry0.p2(), None);
let p1_entry = entry0.p1_entry().unwrap();
assert!(p1_entry.is_none());
let p2_entry = entry0.p2_entry().unwrap();
assert!(p2_entry.is_none());
let entry1 = revlog.get_entry(1).ok().unwrap();
assert_eq!(entry1.revision(), 1);
assert_eq!(*entry1.node(), node1);
assert!(!entry1.has_p1());
assert_eq!(entry1.p1(), None);
assert_eq!(entry1.p2(), None);
let p1_entry = entry1.p1_entry().unwrap();
assert!(p1_entry.is_none());
let p2_entry = entry1.p2_entry().unwrap();
assert!(p2_entry.is_none());
let entry2 = revlog.get_entry(2).ok().unwrap();
assert_eq!(entry2.revision(), 2);
assert_eq!(*entry2.node(), node2);
assert!(entry2.has_p1());
assert_eq!(entry2.p1(), Some(0));
assert_eq!(entry2.p2(), Some(1));
let p1_entry = entry2.p1_entry().unwrap();
assert!(p1_entry.is_some());
assert_eq!(p1_entry.unwrap().revision(), 0);
let p2_entry = entry2.p2_entry().unwrap();
assert!(p2_entry.is_some());
assert_eq!(p2_entry.unwrap().revision(), 1);
}
}