filelog.rs
200 lines
| 7.2 KiB
| application/rls-services+xml
|
RustLexer
Simon Sapin
|
r48775 | use crate::errors::HgError; | ||
use crate::repo::Repo; | ||||
use crate::revlog::path_encode::path_encode; | ||||
Simon Sapin
|
r49374 | use crate::revlog::revlog::RevlogEntry; | ||
Simon Sapin
|
r48775 | use crate::revlog::revlog::{Revlog, RevlogError}; | ||
use crate::revlog::NodePrefix; | ||||
use crate::revlog::Revision; | ||||
use crate::utils::files::get_path_from_bytes; | ||||
use crate::utils::hg_path::HgPath; | ||||
use crate::utils::SliceExt; | ||||
use std::path::PathBuf; | ||||
/// A specialized `Revlog` to work with file data logs. | ||||
pub struct Filelog { | ||||
/// The generic `revlog` format. | ||||
revlog: Revlog, | ||||
} | ||||
impl Filelog { | ||||
Simon Sapin
|
r48777 | pub fn open(repo: &Repo, file_path: &HgPath) -> Result<Self, HgError> { | ||
Simon Sapin
|
r48775 | let index_path = store_path(file_path, b".i"); | ||
let data_path = store_path(file_path, b".d"); | ||||
let revlog = Revlog::open(repo, index_path, Some(&data_path))?; | ||||
Ok(Self { revlog }) | ||||
} | ||||
Simon Sapin
|
r49374 | /// The given node ID is that of the file as found in a filelog, not of a | ||
Simon Sapin
|
r48775 | /// changeset. | ||
Simon Sapin
|
r48783 | pub fn data_for_node( | ||
Simon Sapin
|
r48775 | &self, | ||
file_node: impl Into<NodePrefix>, | ||||
Simon Sapin
|
r49372 | ) -> Result<FilelogRevisionData, RevlogError> { | ||
Simon Sapin
|
r48782 | let file_rev = self.revlog.rev_from_node(file_node.into())?; | ||
Simon Sapin
|
r48783 | self.data_for_rev(file_rev) | ||
Simon Sapin
|
r48775 | } | ||
Simon Sapin
|
r49374 | /// The given revision is that of the file as found in a filelog, not of a | ||
Simon Sapin
|
r48775 | /// changeset. | ||
Simon Sapin
|
r48783 | pub fn data_for_rev( | ||
Simon Sapin
|
r48775 | &self, | ||
file_rev: Revision, | ||||
Simon Sapin
|
r49372 | ) -> Result<FilelogRevisionData, RevlogError> { | ||
Simon Sapin
|
r49373 | let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?.into_owned(); | ||
Simon Sapin
|
r49372 | Ok(FilelogRevisionData(data.into())) | ||
Simon Sapin
|
r48775 | } | ||
Simon Sapin
|
r49374 | |||
/// The given node ID is that of the file as found in a filelog, not of a | ||||
/// changeset. | ||||
pub fn entry_for_node( | ||||
&self, | ||||
file_node: impl Into<NodePrefix>, | ||||
) -> Result<FilelogEntry, RevlogError> { | ||||
let file_rev = self.revlog.rev_from_node(file_node.into())?; | ||||
self.entry_for_rev(file_rev) | ||||
} | ||||
/// The given revision is that of the file as found in a filelog, not of a | ||||
/// changeset. | ||||
pub fn entry_for_rev( | ||||
&self, | ||||
file_rev: Revision, | ||||
) -> Result<FilelogEntry, RevlogError> { | ||||
Ok(FilelogEntry(self.revlog.get_entry(file_rev)?)) | ||||
} | ||||
Simon Sapin
|
r48775 | } | ||
fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf { | ||||
let encoded_bytes = | ||||
path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat()); | ||||
get_path_from_bytes(&encoded_bytes).into() | ||||
} | ||||
Simon Sapin
|
r49374 | pub struct FilelogEntry<'a>(RevlogEntry<'a>); | ||
impl FilelogEntry<'_> { | ||||
Simon Sapin
|
r49378 | /// `self.data()` can be expensive, with decompression and delta | ||
/// resolution. | ||||
/// | ||||
/// *Without* paying this cost, based on revlog index information | ||||
/// including `RevlogEntry::uncompressed_len`: | ||||
/// | ||||
/// * Returns `true` if the length that `self.data().file_data().len()` | ||||
/// would return is definitely **not equal** to `other_len`. | ||||
/// * Returns `false` if available information is inconclusive. | ||||
pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool { | ||||
// Relevant code that implement this behavior in Python code: | ||||
// basefilectx.cmp, filelog.size, storageutil.filerevisioncopied, | ||||
// revlog.size, revlog.rawsize | ||||
// Let’s call `file_data_len` what would be returned by | ||||
// `self.data().file_data().len()`. | ||||
if self.0.is_cencored() { | ||||
let file_data_len = 0; | ||||
return other_len != file_data_len; | ||||
} | ||||
if self.0.has_length_affecting_flag_processor() { | ||||
// We can’t conclude anything about `file_data_len`. | ||||
return false; | ||||
} | ||||
// Revlog revisions (usually) have metadata for the size of | ||||
// their data after decompression and delta resolution | ||||
// as would be returned by `Revlog::get_rev_data`. | ||||
// | ||||
// For filelogs this is the file’s contents preceded by an optional | ||||
// metadata block. | ||||
let uncompressed_len = if let Some(l) = self.0.uncompressed_len() { | ||||
l as u64 | ||||
} else { | ||||
// The field was set to -1, the actual uncompressed len is unknown. | ||||
// We need to decompress to say more. | ||||
return false; | ||||
}; | ||||
// `uncompressed_len = file_data_len + optional_metadata_len`, | ||||
// so `file_data_len <= uncompressed_len`. | ||||
if uncompressed_len < other_len { | ||||
// Transitively, `file_data_len < other_len`. | ||||
// So `other_len != file_data_len` definitely. | ||||
return true; | ||||
} | ||||
if uncompressed_len == other_len + 4 { | ||||
// It’s possible that `file_data_len == other_len` with an empty | ||||
// metadata block (2 start marker bytes + 2 end marker bytes). | ||||
// This happens when there wouldn’t otherwise be metadata, but | ||||
// the first 2 bytes of file data happen to match a start marker | ||||
// and would be ambiguous. | ||||
return false; | ||||
} | ||||
if !self.0.has_p1() { | ||||
// There may or may not be copy metadata, so we can’t deduce more | ||||
// about `file_data_len` without computing file data. | ||||
return false; | ||||
} | ||||
// Filelog ancestry is not meaningful in the way changelog ancestry is. | ||||
// It only provides hints to delta generation. | ||||
// p1 and p2 are set to null when making a copy or rename since | ||||
// contents are likely unrelatedto what might have previously existed | ||||
// at the destination path. | ||||
// | ||||
// Conversely, since here p1 is non-null, there is no copy metadata. | ||||
// Note that this reasoning may be invalidated in the presence of | ||||
// merges made by some previous versions of Mercurial that | ||||
// swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528> | ||||
// and `tests/test-issue6528.t`. | ||||
// | ||||
// Since copy metadata is currently the only kind of metadata | ||||
// kept in revlog data of filelogs, | ||||
// this `FilelogEntry` does not have such metadata: | ||||
let file_data_len = uncompressed_len; | ||||
return file_data_len != other_len; | ||||
} | ||||
Simon Sapin
|
r49374 | pub fn data(&self) -> Result<FilelogRevisionData, HgError> { | ||
Ok(FilelogRevisionData(self.0.data()?.into_owned())) | ||||
} | ||||
} | ||||
Simon Sapin
|
r49372 | /// The data for one revision in a filelog, uncompressed and delta-resolved. | ||
pub struct FilelogRevisionData(Vec<u8>); | ||||
Simon Sapin
|
r48775 | |||
Simon Sapin
|
r49372 | impl FilelogRevisionData { | ||
Simon Sapin
|
r48775 | /// Split into metadata and data | ||
Arseniy Alekseyev
|
r49064 | pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> { | ||
Simon Sapin
|
r48775 | const DELIMITER: &[u8; 2] = &[b'\x01', b'\n']; | ||
if let Some(rest) = self.0.drop_prefix(DELIMITER) { | ||||
if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) { | ||||
Arseniy Alekseyev
|
r49064 | Ok((Some(metadata), data)) | ||
Simon Sapin
|
r48775 | } else { | ||
Err(HgError::corrupted( | ||||
"Missing metadata end delimiter in filelog entry", | ||||
)) | ||||
} | ||||
} else { | ||||
Ok((None, &self.0)) | ||||
} | ||||
} | ||||
/// Returns the file contents at this revision, stripped of any metadata | ||||
Simon Sapin
|
r49372 | pub fn file_data(&self) -> Result<&[u8], HgError> { | ||
Simon Sapin
|
r48775 | let (_metadata, data) = self.split()?; | ||
Ok(data) | ||||
} | ||||
Arseniy Alekseyev
|
r49051 | |||
/// Consume the entry, and convert it into data, discarding any metadata, | ||||
/// if present. | ||||
Simon Sapin
|
r49372 | pub fn into_file_data(self) -> Result<Vec<u8>, HgError> { | ||
Arseniy Alekseyev
|
r49064 | if let (Some(_metadata), data) = self.split()? { | ||
Arseniy Alekseyev
|
r49051 | Ok(data.to_owned()) | ||
} else { | ||||
Ok(self.0) | ||||
} | ||||
} | ||||
Simon Sapin
|
r48775 | } | ||