manifest.rs
209 lines
| 6.9 KiB
| application/rls-services+xml
|
RustLexer
Simon Sapin
|
r48777 | use crate::errors::HgError; | ||
Simon Sapin
|
r48778 | use crate::revlog::{Node, NodePrefix}; | ||
Raphaël Gomès
|
r50832 | use crate::revlog::{Revlog, RevlogError}; | ||
Antoine Cezar
|
r46104 | use crate::utils::hg_path::HgPath; | ||
Simon Sapin
|
r49166 | use crate::utils::SliceExt; | ||
Martin von Zweigbergk
|
r49981 | use crate::vfs::Vfs; | ||
Raphaël Gomès
|
r51871 | use crate::{Graph, GraphError, Revision, UncheckedRevision}; | ||
Antoine Cezar
|
r46104 | |||
/// A specialized `Revlog` to work with `manifest` data format. | ||||
Simon Sapin
|
r48771 | pub struct Manifestlog { | ||
Antoine Cezar
|
r46104 | /// The generic `revlog` format. | ||
revlog: Revlog, | ||||
} | ||||
Raphaël Gomès
|
r51871 | impl Graph for Manifestlog { | ||
fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> { | ||||
self.revlog.parents(rev) | ||||
} | ||||
} | ||||
Simon Sapin
|
r48771 | impl Manifestlog { | ||
Antoine Cezar
|
r46104 | /// Open the `manifest` of a repository given by its root. | ||
Martin von Zweigbergk
|
r49981 | pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> { | ||
let revlog = | ||||
Revlog::open(store_vfs, "00manifest.i", None, use_nodemap)?; | ||||
Antoine Cezar
|
r46104 | Ok(Self { revlog }) | ||
} | ||||
Simon Sapin
|
r48783 | /// Return the `Manifest` for the given node ID. | ||
/// | ||||
/// Note: this is a node ID in the manifestlog, typically found through | ||||
/// `ChangelogEntry::manifest_node`. It is *not* the node ID of any | ||||
/// changeset. | ||||
/// | ||||
/// See also `Repo::manifest_for_node` | ||||
pub fn data_for_node( | ||||
&self, | ||||
node: NodePrefix, | ||||
) -> Result<Manifest, RevlogError> { | ||||
Simon Sapin
|
r48782 | let rev = self.revlog.rev_from_node(node)?; | ||
Raphaël Gomès
|
r51870 | self.data_for_checked_rev(rev) | ||
Antoine Cezar
|
r46104 | } | ||
Simon Sapin
|
r48783 | /// Return the `Manifest` of a given revision number. | ||
/// | ||||
/// Note: this is a revision number in the manifestlog, *not* of any | ||||
/// changeset. | ||||
/// | ||||
/// See also `Repo::manifest_for_rev` | ||||
pub fn data_for_rev( | ||||
&self, | ||||
Raphaël Gomès
|
r51870 | rev: UncheckedRevision, | ||
) -> Result<Manifest, RevlogError> { | ||||
let bytes = self.revlog.get_rev_data(rev)?.into_owned(); | ||||
Ok(Manifest { bytes }) | ||||
} | ||||
pub fn data_for_checked_rev( | ||||
&self, | ||||
Simon Sapin
|
r48783 | rev: Revision, | ||
) -> Result<Manifest, RevlogError> { | ||||
Raphaël Gomès
|
r51870 | let bytes = | ||
self.revlog.get_rev_data_for_checked_rev(rev)?.into_owned(); | ||||
Simon Sapin
|
r48771 | Ok(Manifest { bytes }) | ||
Antoine Cezar
|
r46104 | } | ||
} | ||||
Simon Sapin
|
r48771 | /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes. | ||
Antoine Cezar
|
r46104 | #[derive(Debug)] | ||
Simon Sapin
|
r48771 | pub struct Manifest { | ||
Simon Sapin
|
r49324 | /// Format for a manifest: flat sequence of variable-size entries, | ||
/// sorted by path, each as: | ||||
/// | ||||
/// ```text | ||||
/// <path> \0 <hex_node_id> <flags> \n | ||||
/// ``` | ||||
/// | ||||
/// The last entry is also terminated by a newline character. | ||||
/// Flags is one of `b""` (the empty string), `b"x"`, `b"l"`, or `b"t"`. | ||||
Antoine Cezar
|
r46104 | bytes: Vec<u8>, | ||
} | ||||
Simon Sapin
|
r48771 | impl Manifest { | ||
Simon Sapin
|
r49166 | pub fn iter( | ||
&self, | ||||
) -> impl Iterator<Item = Result<ManifestEntry, HgError>> { | ||||
Antoine Cezar
|
r46104 | self.bytes | ||
.split(|b| b == &b'\n') | ||||
.filter(|line| !line.is_empty()) | ||||
Simon Sapin
|
r49324 | .map(ManifestEntry::from_raw) | ||
Antoine Cezar
|
r46111 | } | ||
Simon Sapin
|
r48778 | |||
/// If the given path is in this manifest, return its filelog node ID | ||||
Simon Sapin
|
r49324 | pub fn find_by_path( | ||
Simon Sapin
|
r49166 | &self, | ||
path: &HgPath, | ||||
) -> Result<Option<ManifestEntry>, HgError> { | ||||
Simon Sapin
|
r49324 | use std::cmp::Ordering::*; | ||
let path = path.as_bytes(); | ||||
// Both boundaries of this `&[u8]` slice are always at the boundary of | ||||
// an entry | ||||
let mut bytes = &*self.bytes; | ||||
Simon Sapin
|
r49166 | |||
Simon Sapin
|
r49324 | // Binary search algorithm derived from `[T]::binary_search_by` | ||
// <https://github.com/rust-lang/rust/blob/1.57.0/library/core/src/slice/mod.rs#L2221> | ||||
// except we don’t have a slice of entries. Instead we jump to the | ||||
// middle of the byte slice and look around for entry delimiters | ||||
// (newlines). | ||||
while let Some(entry_range) = Self::find_entry_near_middle_of(bytes)? { | ||||
let (entry_path, rest) = | ||||
ManifestEntry::split_path(&bytes[entry_range.clone()])?; | ||||
let cmp = entry_path.cmp(path); | ||||
if cmp == Less { | ||||
let after_newline = entry_range.end + 1; | ||||
bytes = &bytes[after_newline..]; | ||||
} else if cmp == Greater { | ||||
bytes = &bytes[..entry_range.start]; | ||||
} else { | ||||
return Ok(Some(ManifestEntry::from_path_and_rest( | ||||
entry_path, rest, | ||||
))); | ||||
Simon Sapin
|
r48778 | } | ||
} | ||||
Ok(None) | ||||
} | ||||
Simon Sapin
|
r49324 | |||
/// If there is at least one, return the byte range of an entry *excluding* | ||||
/// the final newline. | ||||
fn find_entry_near_middle_of( | ||||
bytes: &[u8], | ||||
) -> Result<Option<std::ops::Range<usize>>, HgError> { | ||||
let len = bytes.len(); | ||||
if len > 0 { | ||||
let middle = bytes.len() / 2; | ||||
// Integer division rounds down, so `middle < len`. | ||||
let (before, after) = bytes.split_at(middle); | ||||
let is_newline = |&byte: &u8| byte == b'\n'; | ||||
let entry_start = match before.iter().rposition(is_newline) { | ||||
Some(i) => i + 1, | ||||
None => 0, // We choose the first entry in `bytes` | ||||
}; | ||||
let entry_end = match after.iter().position(is_newline) { | ||||
Some(i) => { | ||||
// No `+ 1` here to exclude this newline from the range | ||||
middle + i | ||||
} | ||||
None => { | ||||
// In a well-formed manifest: | ||||
// | ||||
// * Since `len > 0`, `bytes` contains at least one entry | ||||
// * Every entry ends with a newline | ||||
// * Since `middle < len`, `after` contains at least the | ||||
// newline at the end of the last entry of `bytes`. | ||||
// | ||||
// We didn’t find a newline, so this manifest is not | ||||
// well-formed. | ||||
return Err(HgError::corrupted( | ||||
"manifest entry without \\n delimiter", | ||||
)); | ||||
} | ||||
}; | ||||
Ok(Some(entry_start..entry_end)) | ||||
} else { | ||||
// len == 0 | ||||
Ok(None) | ||||
} | ||||
} | ||||
Antoine Cezar
|
r46104 | } | ||
Simon Sapin
|
r49166 | |||
/// `Manifestlog` entry which knows how to interpret the `manifest` data bytes. | ||||
#[derive(Debug)] | ||||
pub struct ManifestEntry<'manifest> { | ||||
pub path: &'manifest HgPath, | ||||
pub hex_node_id: &'manifest [u8], | ||||
/// `Some` values are b'x', b'l', or 't' | ||||
pub flags: Option<u8>, | ||||
} | ||||
Simon Sapin
|
r49324 | impl<'a> ManifestEntry<'a> { | ||
fn split_path(bytes: &[u8]) -> Result<(&[u8], &[u8]), HgError> { | ||||
bytes.split_2(b'\0').ok_or_else(|| { | ||||
HgError::corrupted("manifest entry without \\0 delimiter") | ||||
}) | ||||
} | ||||
fn from_path_and_rest(path: &'a [u8], rest: &'a [u8]) -> Self { | ||||
let (hex_node_id, flags) = match rest.split_last() { | ||||
Some((&b'x', rest)) => (rest, Some(b'x')), | ||||
Some((&b'l', rest)) => (rest, Some(b'l')), | ||||
Some((&b't', rest)) => (rest, Some(b't')), | ||||
_ => (rest, None), | ||||
}; | ||||
Self { | ||||
path: HgPath::new(path), | ||||
hex_node_id, | ||||
flags, | ||||
} | ||||
} | ||||
fn from_raw(bytes: &'a [u8]) -> Result<Self, HgError> { | ||||
let (path, rest) = Self::split_path(bytes)?; | ||||
Ok(Self::from_path_and_rest(path, rest)) | ||||
} | ||||
Simon Sapin
|
r49166 | pub fn node_id(&self) -> Result<Node, HgError> { | ||
Node::from_hex_for_repo(self.hex_node_id) | ||||
} | ||||
} | ||||