upstream/mercurial-mirror Commit - r49373:f2f57724

rhg: Add RevlogEntry::data that does delta resolution...

Simon Sapin -

r49373:f2f57724 default

parent child

rust/hg-core/src/operations/debugdata.rs

0 +1 -1

             // debugdata.rs
             //
             // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
             //
             // This software may be used and distributed according to the terms of the
             // GNU General Public License version 2 or any later version.
             use crate::repo::Repo;
             use crate::revlog::revlog::{Revlog, RevlogError};
             /// Kind of data to debug
             #[derive(Debug, Copy, Clone)]
             pub enum DebugDataKind {
                 Changelog,
                 Manifest,
             }
             /// Dump the contents data of a revision.
             pub fn debug_data(
                 repo: &Repo,
                 revset: &str,
                 kind: DebugDataKind,
             ) -> Result<Vec<u8>, RevlogError> {
                 let index_file = match kind {
                     DebugDataKind::Changelog => "00changelog.i",
                     DebugDataKind::Manifest => "00manifest.i",
                 };
                 let revlog = Revlog::open(repo, index_file, None)?;
                 let rev =
                     crate::revset::resolve_rev_number_or_hex_prefix(revset, &revlog)?;
                 let data = revlog.get_rev_data(rev)?;
-                Ok(data)
+                Ok(data.into_owned())
             }

rust/hg-core/src/revlog/changelog.rs

0 +1 -1

             use crate::errors::HgError;
             use crate::repo::Repo;
             use crate::revlog::node::NULL_NODE;
             use crate::revlog::revlog::{Revlog, RevlogError};
             use crate::revlog::Revision;
             use crate::revlog::{Node, NodePrefix};
             /// A specialized `Revlog` to work with `changelog` data format.
             pub struct Changelog {
                 /// The generic `revlog` format.
                 pub(crate) revlog: Revlog,
             }
             impl Changelog {
                 /// Open the `changelog` of a repository given by its root.
                 pub fn open(repo: &Repo) -> Result<Self, HgError> {
                     let revlog = Revlog::open(repo, "00changelog.i", None)?;
                     Ok(Self { revlog })
                 }
                 /// Return the `ChangelogEntry` for the given node ID.
                 pub fn data_for_node(
                     &self,
                     node: NodePrefix,
                 ) -> Result<ChangelogRevisionData, RevlogError> {
                     let rev = self.revlog.rev_from_node(node)?;
                     self.data_for_rev(rev)
                 }
                 /// Return the `ChangelogEntry` of the given revision number.
                 pub fn data_for_rev(
                     &self,
                     rev: Revision,
                 ) -> Result<ChangelogRevisionData, RevlogError> {
-                    let bytes = self.revlog.get_rev_data(rev)?;
+                    let bytes = self.revlog.get_rev_data(rev)?.into_owned();
                     Ok(ChangelogRevisionData { bytes })
                 }
                 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
                     self.revlog.node_from_rev(rev)
                 }
             }
             /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
             #[derive(Debug)]
             pub struct ChangelogRevisionData {
                 /// The data bytes of the `changelog` entry.
                 bytes: Vec<u8>,
             }
             impl ChangelogRevisionData {
                 /// Return an iterator over the lines of the entry.
                 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
                     self.bytes
                         .split(|b| b == &b'\n')
                         .filter(|line| !line.is_empty())
                 }
                 /// Return the node id of the `manifest` referenced by this `changelog`
                 /// entry.
                 pub fn manifest_node(&self) -> Result<Node, HgError> {
                     match self.lines().next() {
                         None => Ok(NULL_NODE),
                         Some(x) => Node::from_hex_for_repo(x),
                     }
                 }
             }

rust/hg-core/src/revlog/filelog.rs

0 +1 -1

             use crate::errors::HgError;
             use crate::repo::Repo;
             use crate::revlog::path_encode::path_encode;
             use crate::revlog::revlog::{Revlog, RevlogError};
             use crate::revlog::NodePrefix;
             use crate::revlog::Revision;
             use crate::utils::files::get_path_from_bytes;
             use crate::utils::hg_path::HgPath;
             use crate::utils::SliceExt;
             use std::path::PathBuf;
             /// A specialized `Revlog` to work with file data logs.
             pub struct Filelog {
                 /// The generic `revlog` format.
                 revlog: Revlog,
             }
             impl Filelog {
                 pub fn open(repo: &Repo, file_path: &HgPath) -> Result<Self, HgError> {
                     let index_path = store_path(file_path, b".i");
                     let data_path = store_path(file_path, b".d");
                     let revlog = Revlog::open(repo, index_path, Some(&data_path))?;
                     Ok(Self { revlog })
                 }
                 /// The given node ID is that of the file as found in a manifest, not of a
                 /// changeset.
                 pub fn data_for_node(
                     &self,
                     file_node: impl Into<NodePrefix>,
                 ) -> Result<FilelogRevisionData, RevlogError> {
                     let file_rev = self.revlog.rev_from_node(file_node.into())?;
                     self.data_for_rev(file_rev)
                 }
                 /// The given revision is that of the file as found in a manifest, not of a
                 /// changeset.
                 pub fn data_for_rev(
                     &self,
                     file_rev: Revision,
                 ) -> Result<FilelogRevisionData, RevlogError> {
-                    let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?;
+                    let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?.into_owned();
                     Ok(FilelogRevisionData(data.into()))
                 }
             }
             fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
                 let encoded_bytes =
                     path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
                 get_path_from_bytes(&encoded_bytes).into()
             }
             /// The data for one revision in a filelog, uncompressed and delta-resolved.
             pub struct FilelogRevisionData(Vec<u8>);
             impl FilelogRevisionData {
                 /// Split into metadata and data
                 pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> {
                     const DELIMITER: &[u8; 2] = &[b'\x01', b'\n'];
                     if let Some(rest) = self.0.drop_prefix(DELIMITER) {
                         if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) {
                             Ok((Some(metadata), data))
                         } else {
                             Err(HgError::corrupted(
                                 "Missing metadata end delimiter in filelog entry",
                             ))
                         }
                     } else {
                         Ok((None, &self.0))
                     }
                 }
                 /// Returns the file contents at this revision, stripped of any metadata
                 pub fn file_data(&self) -> Result<&[u8], HgError> {
                     let (_metadata, data) = self.split()?;
                     Ok(data)
                 }
                 /// Consume the entry, and convert it into data, discarding any metadata,
                 /// if present.
                 pub fn into_file_data(self) -> Result<Vec<u8>, HgError> {
                     if let (Some(_metadata), data) = self.split()? {
                         Ok(data.to_owned())
                     } else {
                         Ok(self.0)
                     }
                 }
             }

rust/hg-core/src/revlog/manifest.rs

0 +1 -1

             use crate::errors::HgError;
             use crate::repo::Repo;
             use crate::revlog::revlog::{Revlog, RevlogError};
             use crate::revlog::Revision;
             use crate::revlog::{Node, NodePrefix};
             use crate::utils::hg_path::HgPath;
             use crate::utils::SliceExt;
             /// A specialized `Revlog` to work with `manifest` data format.
             pub struct Manifestlog {
                 /// The generic `revlog` format.
                 revlog: Revlog,
             }
             impl Manifestlog {
                 /// Open the `manifest` of a repository given by its root.
                 pub fn open(repo: &Repo) -> Result<Self, HgError> {
                     let revlog = Revlog::open(repo, "00manifest.i", None)?;
                     Ok(Self { revlog })
                 }
                 /// Return the `Manifest` for the given node ID.
                 ///
                 /// Note: this is a node ID in the manifestlog, typically found through
                 /// `ChangelogEntry::manifest_node`. It is *not* the node ID of any
                 /// changeset.
                 ///
                 /// See also `Repo::manifest_for_node`
                 pub fn data_for_node(
                     &self,
                     node: NodePrefix,
                 ) -> Result<Manifest, RevlogError> {
                     let rev = self.revlog.rev_from_node(node)?;
                     self.data_for_rev(rev)
                 }
                 /// Return the `Manifest` of a given revision number.
                 ///
                 /// Note: this is a revision number in the manifestlog, *not* of any
                 /// changeset.
                 ///
                 /// See also `Repo::manifest_for_rev`
                 pub fn data_for_rev(
                     &self,
                     rev: Revision,
                 ) -> Result<Manifest, RevlogError> {
-                    let bytes = self.revlog.get_rev_data(rev)?;
+                    let bytes = self.revlog.get_rev_data(rev)?.into_owned();
                     Ok(Manifest { bytes })
                 }
             }
             /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
             #[derive(Debug)]
             pub struct Manifest {
                 /// Format for a manifest: flat sequence of variable-size entries,
                 /// sorted by path, each as:
                 ///
                 /// ```text
                 /// <path> \0 <hex_node_id> <flags> \n
                 /// ```
                 ///
                 /// The last entry is also terminated by a newline character.
                 /// Flags is one of `b""` (the empty string), `b"x"`, `b"l"`, or `b"t"`.
                 bytes: Vec<u8>,
             }
             impl Manifest {
                 pub fn iter(
                     &self,
                 ) -> impl Iterator<Item = Result<ManifestEntry, HgError>> {
                     self.bytes
                         .split(|b| b == &b'\n')
                         .filter(|line| !line.is_empty())
                         .map(ManifestEntry::from_raw)
                 }
                 /// If the given path is in this manifest, return its filelog node ID
                 pub fn find_by_path(
                     &self,
                     path: &HgPath,
                 ) -> Result<Option<ManifestEntry>, HgError> {
                     use std::cmp::Ordering::*;
                     let path = path.as_bytes();
                     // Both boundaries of this `&[u8]` slice are always at the boundary of
                     // an entry
                     let mut bytes = &*self.bytes;
                     // Binary search algorithm derived from `[T]::binary_search_by`
                     // <https://github.com/rust-lang/rust/blob/1.57.0/library/core/src/slice/mod.rs#L2221>
                     // except we don’t have a slice of entries. Instead we jump to the
                     // middle of the byte slice and look around for entry delimiters
                     // (newlines).
                     while let Some(entry_range) = Self::find_entry_near_middle_of(bytes)? {
                         let (entry_path, rest) =
                             ManifestEntry::split_path(&bytes[entry_range.clone()])?;
                         let cmp = entry_path.cmp(path);
                         if cmp == Less {
                             let after_newline = entry_range.end + 1;
                             bytes = &bytes[after_newline..];
                         } else if cmp == Greater {
                             bytes = &bytes[..entry_range.start];
                         } else {
                             return Ok(Some(ManifestEntry::from_path_and_rest(
                                 entry_path, rest,
                             )));
                         }
                     }
                     Ok(None)
                 }
                 /// If there is at least one, return the byte range of an entry *excluding*
                 /// the final newline.
                 fn find_entry_near_middle_of(
                     bytes: &[u8],
                 ) -> Result<Option<std::ops::Range<usize>>, HgError> {
                     let len = bytes.len();
                     if len > 0 {
                         let middle = bytes.len() / 2;
                         // Integer division rounds down, so `middle < len`.
                         let (before, after) = bytes.split_at(middle);
                         let is_newline = |&byte: &u8| byte == b'\n';
                         let entry_start = match before.iter().rposition(is_newline) {
                             Some(i) => i + 1,
                             None => 0, // We choose the first entry in `bytes`
                         };
                         let entry_end = match after.iter().position(is_newline) {
                             Some(i) => {
                                 // No `+ 1` here to exclude this newline from the range
                                 middle + i
                             }
                             None => {
                                 // In a well-formed manifest:
                                 //
                                 // * Since `len > 0`, `bytes` contains at least one entry
                                 // * Every entry ends with a newline
                                 // * Since `middle < len`, `after` contains at least the
                                 //   newline at the end of the last entry of `bytes`.
                                 //
                                 // We didn’t find a newline, so this manifest is not
                                 // well-formed.
                                 return Err(HgError::corrupted(
                                     "manifest entry without \\n delimiter",
                                 ));
                             }
                         };
                         Ok(Some(entry_start..entry_end))
                     } else {
                         // len == 0
                         Ok(None)
                     }
                 }
             }
             /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
             #[derive(Debug)]
             pub struct ManifestEntry<'manifest> {
                 pub path: &'manifest HgPath,
                 pub hex_node_id: &'manifest [u8],
                 /// `Some` values are b'x', b'l', or 't'
                 pub flags: Option<u8>,
             }
             impl<'a> ManifestEntry<'a> {
                 fn split_path(bytes: &[u8]) -> Result<(&[u8], &[u8]), HgError> {
                     bytes.split_2(b'\0').ok_or_else(|| {
                         HgError::corrupted("manifest entry without \\0 delimiter")
                     })
                 }
                 fn from_path_and_rest(path: &'a [u8], rest: &'a [u8]) -> Self {
                     let (hex_node_id, flags) = match rest.split_last() {
                         Some((&b'x', rest)) => (rest, Some(b'x')),
                         Some((&b'l', rest)) => (rest, Some(b'l')),
                         Some((&b't', rest)) => (rest, Some(b't')),
                         _ => (rest, None),
                     };
                     Self {
                         path: HgPath::new(path),
                         hex_node_id,
                         flags,
                     }
                 }
                 fn from_raw(bytes: &'a [u8]) -> Result<Self, HgError> {
                     let (path, rest) = Self::split_path(bytes)?;
                     Ok(Self::from_path_and_rest(path, rest))
                 }
                 pub fn node_id(&self) -> Result<Node, HgError> {
                     Node::from_hex_for_repo(self.hex_node_id)
                 }
             }

rust/hg-core/src/revlog/revlog.rs

0 +55 -45

             use std::borrow::Cow;
             use std::io::Read;
             use std::ops::Deref;
             use std::path::Path;
             use flate2::read::ZlibDecoder;
             use micro_timer::timed;
             use sha1::{Digest, Sha1};
             use zstd;
             use super::index::Index;
             use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
             use super::nodemap;
             use super::nodemap::{NodeMap, NodeMapError};
             use super::nodemap_docket::NodeMapDocket;
             use super::patch;
             use crate::errors::HgError;
             use crate::repo::Repo;
             use crate::revlog::Revision;
             use crate::{Node, NULL_REVISION};
             #[derive(derive_more::From)]
             pub enum RevlogError {
                 InvalidRevision,
                 /// Working directory is not supported
                 WDirUnsupported,
                 /// Found more than one entry whose ID match the requested prefix
                 AmbiguousPrefix,
                 #[from]
                 Other(HgError),
             }
             impl From<NodeMapError> for RevlogError {
                 fn from(error: NodeMapError) -> Self {
                     match error {
                         NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
                         NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(),
                     }
                 }
             }
             impl RevlogError {
                 fn corrupted() -> Self {
                     RevlogError::Other(HgError::corrupted("corrupted revlog"))
                 }
             }
             /// Read only implementation of revlog.
             pub struct Revlog {
                 /// When index and data are not interleaved: bytes of the revlog index.
                 /// When index and data are interleaved: bytes of the revlog index and
                 /// data.
                 index: Index,
                 /// When index and data are not interleaved: bytes of the revlog data
                 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
                 /// When present on disk: the persistent nodemap for this revlog
                 nodemap: Option<nodemap::NodeTree>,
             }
             impl Revlog {
                 /// Open a revlog index file.
                 ///
                 /// It will also open the associated data file if index and data are not
                 /// interleaved.
                 #[timed]
                 pub fn open(
                     repo: &Repo,
                     index_path: impl AsRef<Path>,
                     data_path: Option<&Path>,
                 ) -> Result<Self, HgError> {
                     let index_path = index_path.as_ref();
                     let index = {
                         match repo.store_vfs().mmap_open_opt(&index_path)? {
                             None => Index::new(Box::new(vec![])),
                             Some(index_mmap) => {
                                 let index = Index::new(Box::new(index_mmap))?;
                                 Ok(index)
                             }
                         }
                     }?;
                     let default_data_path = index_path.with_extension("d");
                     // type annotation required
                     // won't recognize Mmap as Deref<Target = [u8]>
                     let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
                         if index.is_inline() {
                             None
                         } else {
                             let data_path = data_path.unwrap_or(&default_data_path);
                             let data_mmap = repo.store_vfs().mmap_open(data_path)?;
                             Some(Box::new(data_mmap))
                         };
                     let nodemap = if index.is_inline() {
                         None
                     } else {
                         NodeMapDocket::read_from_file(repo, index_path)?.map(
                             |(docket, data)| {
                                 nodemap::NodeTree::load_bytes(
                                     Box::new(data),
                                     docket.data_length,
                                 )
                             },
                         )
                     };
                     Ok(Revlog {
                         index,
                         data_bytes,
                         nodemap,
                     })
                 }
                 /// Return number of entries of the `Revlog`.
                 pub fn len(&self) -> usize {
                     self.index.len()
                 }
                 /// Returns `true` if the `Revlog` has zero `entries`.
                 pub fn is_empty(&self) -> bool {
                     self.index.is_empty()
                 }
                 /// Returns the node ID for the given revision number, if it exists in this
                 /// revlog
                 pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> {
                     if rev == NULL_REVISION {
                         return Some(&NULL_NODE);
                     }
                     Some(self.index.get_entry(rev)?.hash())
                 }
                 /// Return the revision number for the given node ID, if it exists in this
                 /// revlog
                 #[timed]
                 pub fn rev_from_node(
                     &self,
                     node: NodePrefix,
                 ) -> Result<Revision, RevlogError> {
                     if node.is_prefix_of(&NULL_NODE) {
                         return Ok(NULL_REVISION);
                     }
                     if let Some(nodemap) = &self.nodemap {
                         return nodemap
                             .find_bin(&self.index, node)?
                             .ok_or(RevlogError::InvalidRevision);
                     }
                     // Fallback to linear scan when a persistent nodemap is not present.
                     // This happens when the persistent-nodemap experimental feature is not
                     // enabled, or for small revlogs.
                     //
                     // TODO: consider building a non-persistent nodemap in memory to
                     // optimize these cases.
                     let mut found_by_prefix = None;
                     for rev in (0..self.len() as Revision).rev() {
                         let index_entry =
                             self.index.get_entry(rev).ok_or(HgError::corrupted(
                                 "revlog references a revision not in the index",
                             ))?;
                         if node == *index_entry.hash() {
                             return Ok(rev);
                         }
                         if node.is_prefix_of(index_entry.hash()) {
                             if found_by_prefix.is_some() {
                                 return Err(RevlogError::AmbiguousPrefix);
                             }
                             found_by_prefix = Some(rev)
                         }
                     }
                     found_by_prefix.ok_or(RevlogError::InvalidRevision)
                 }
                 /// Returns whether the given revision exists in this revlog.
                 pub fn has_rev(&self, rev: Revision) -> bool {
                     self.index.get_entry(rev).is_some()
                 }
                 /// Return the full data associated to a revision.
                 ///
                 /// All entries required to build the final data out of deltas will be
                 /// retrieved as needed, and the deltas will be applied to the inital
                 /// snapshot to rebuild the final data.
                 #[timed]
-                pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> {
+                pub fn get_rev_data(
+                    &self,
+                    rev: Revision,
+                ) -> Result<Cow<[u8]>, RevlogError> {
                     if rev == NULL_REVISION {
-                        return Ok(vec![]);
+                        return Ok(Cow::Borrowed(&[]));
                     };
-                    // Todo return -> Cow
+                    self.get_entry(rev)?.data()
-                    let mut entry = self.get_entry(rev)?;
-                    let mut delta_chain = vec![];
-                    // The meaning of `base_rev_or_base_of_delta_chain` depends on
-                    // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
-                    // `mercurial/revlogutils/constants.py` and the code in
-                    // [_chaininfo] and in [index_deltachain].
-                    let uses_generaldelta = self.index.uses_generaldelta();
-                    while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
-                        let base_rev = if uses_generaldelta {
-                            base_rev
-                        } else {
-                            entry.rev - 1
-                        };
-                        delta_chain.push(entry);
-                        entry = self.get_entry_internal(base_rev)?;
-                    // TODO do not look twice in the index
-                    let index_entry = self
-                        .index
-                        .get_entry(rev)
-                        .ok_or(RevlogError::InvalidRevision)?;
-                    let data: Vec<u8> = if delta_chain.is_empty() {
-                        entry.data_chunk()?.into()
-                    } else {
-                        Revlog::build_data_from_deltas(entry, &delta_chain)?
-                    };
-                    if self.check_hash(
-                        index_entry.p1(),
-                        index_entry.p2(),
-                        index_entry.hash().as_bytes(),
-                        &data,
-                    ) {
-                        Ok(data)
-                    } else {
-                        Err(RevlogError::corrupted())
                 }
                 /// Check the hash of some given data against the recorded hash.
                 pub fn check_hash(
                     &self,
                     p1: Revision,
                     p2: Revision,
                     expected: &[u8],
                     data: &[u8],
                 ) -> bool {
                     let e1 = self.index.get_entry(p1);
                     let h1 = match e1 {
                         Some(ref entry) => entry.hash(),
                         None => &NULL_NODE,
                     };
                     let e2 = self.index.get_entry(p2);
                     let h2 = match e2 {
                         Some(ref entry) => entry.hash(),
                         None => &NULL_NODE,
                     };
                     &hash(data, h1.as_bytes(), h2.as_bytes()) == expected
                 }
                 /// Build the full data of a revision out its snapshot
                 /// and its deltas.
                 #[timed]
                 fn build_data_from_deltas(
                     snapshot: RevlogEntry,
                     deltas: &[RevlogEntry],
                 ) -> Result<Vec<u8>, RevlogError> {
                     let snapshot = snapshot.data_chunk()?;
                     let deltas = deltas
                         .iter()
                         .rev()
                         .map(RevlogEntry::data_chunk)
                         .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?;
                     let patches: Vec<_> =
                         deltas.iter().map(|d| patch::PatchList::new(d)).collect();
                     let patch = patch::fold_patch_lists(&patches);
                     Ok(patch.apply(&snapshot))
                 }
                 /// Return the revlog data.
                 fn data(&self) -> &[u8] {
                     match self.data_bytes {
                         Some(ref data_bytes) => &data_bytes,
                         None => panic!(
                             "forgot to load the data or trying to access inline data"
                         ),
                     }
                 }
                 /// Get an entry of the revlog.
                 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
                     let index_entry = self
                         .index
                         .get_entry(rev)
                         .ok_or(RevlogError::InvalidRevision)?;
                     let start = index_entry.offset();
                     let end = start + index_entry.compressed_len();
                     let data = if self.index.is_inline() {
                         self.index.data(start, end)
                     } else {
                         &self.data()[start..end]
                     };
                     let entry = RevlogEntry {
+                        revlog: self,
                         rev,
                         bytes: data,
                         compressed_len: index_entry.compressed_len(),
                         uncompressed_len: index_entry.uncompressed_len(),
                         base_rev_or_base_of_delta_chain: if index_entry
                             .base_revision_or_base_of_delta_chain()
                             == rev
                         {
                             None
                         } else {
                             Some(index_entry.base_revision_or_base_of_delta_chain())
                         },
                     };
                     Ok(entry)
                 }
                 /// when resolving internal references within revlog, any errors
                 /// should be reported as corruption, instead of e.g. "invalid revision"
                 fn get_entry_internal(
                     &self,
                     rev: Revision,
                 ) -> Result<RevlogEntry, RevlogError> {
                     return self.get_entry(rev).map_err(|_| RevlogError::corrupted());
                 }
             }
             /// The revlog entry's bytes and the necessary informations to extract
             /// the entry's data.
-            #[derive(Debug)]
+            #[derive(Clone)]
             pub struct RevlogEntry<'a> {
+                revlog: &'a Revlog,
                 rev: Revision,
                 bytes: &'a [u8],
                 compressed_len: usize,
                 uncompressed_len: usize,
                 base_rev_or_base_of_delta_chain: Option<Revision>,
             }
             impl<'a> RevlogEntry<'a> {
                 pub fn revision(&self) -> Revision {
                     self.rev
                 }
+                /// The data for this entry, after resolving deltas if any.
+                pub fn data(&self) -> Result<Cow<'a, [u8]>, RevlogError> {
+                    let mut entry = self.clone();
+                    let mut delta_chain = vec![];
+                    // The meaning of `base_rev_or_base_of_delta_chain` depends on
+                    // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
+                    // `mercurial/revlogutils/constants.py` and the code in
+                    // [_chaininfo] and in [index_deltachain].
+                    let uses_generaldelta = self.revlog.index.uses_generaldelta();
+                    while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
+                        let base_rev = if uses_generaldelta {
+                            base_rev
+                        } else {
+                            entry.rev - 1
+                        };
+                        delta_chain.push(entry);
+                        entry = self.revlog.get_entry_internal(base_rev)?;
+                    }
+                    // TODO do not look twice in the index
+                    let index_entry = self
+                        .revlog
+                        .index
+                        .get_entry(self.rev)
+                        .ok_or(RevlogError::InvalidRevision)?;
+                    let data = if delta_chain.is_empty() {
+                        entry.data_chunk()?
+                    } else {
+                        Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
+                    };
+                    if self.revlog.check_hash(
+                        index_entry.p1(),
+                        index_entry.p2(),
+                        index_entry.hash().as_bytes(),
+                        &data,
+                    ) {
+                        Ok(data)
+                    } else {
+                        Err(RevlogError::corrupted())
+                    }
+                }
                 /// Extract the data contained in the entry.
                 /// This may be a delta. (See `is_delta`.)
-                fn data_chunk(&self) -> Result<Cow<'_, [u8]>, RevlogError> {
+                fn data_chunk(&self) -> Result<Cow<'a, [u8]>, RevlogError> {
                     if self.bytes.is_empty() {
                         return Ok(Cow::Borrowed(&[]));
                     }
                     match self.bytes[0] {
                         // Revision data is the entirety of the entry, including this
                         // header.
                         b'\0' => Ok(Cow::Borrowed(self.bytes)),
                         // Raw revision data follows.
                         b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
                         // zlib (RFC 1950) data.
                         b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
                         // zstd data.
                         b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
                         // A proper new format should have had a repo/store requirement.
                         _format_type => Err(RevlogError::corrupted()),
                     }
                 }
                 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> {
                     let mut decoder = ZlibDecoder::new(self.bytes);
                     if self.is_delta() {
                         let mut buf = Vec::with_capacity(self.compressed_len);
                         decoder
                             .read_to_end(&mut buf)
                             .map_err(|_| RevlogError::corrupted())?;
                         Ok(buf)
                     } else {
                         let mut buf = vec![0; self.uncompressed_len];
                         decoder
                             .read_exact(&mut buf)
                             .map_err(|_| RevlogError::corrupted())?;
                         Ok(buf)
                     }
                 }
                 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> {
                     if self.is_delta() {
                         let mut buf = Vec::with_capacity(self.compressed_len);
                         zstd::stream::copy_decode(self.bytes, &mut buf)
                             .map_err(|_| RevlogError::corrupted())?;
                         Ok(buf)
                     } else {
                         let mut buf = vec![0; self.uncompressed_len];
                         let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
                             .map_err(|_| RevlogError::corrupted())?;
                         if len != self.uncompressed_len {
                             Err(RevlogError::corrupted())
                         } else {
                             Ok(buf)
                         }
                     }
                 }
                 /// Tell if the entry is a snapshot or a delta
                 /// (influences on decompression).
                 fn is_delta(&self) -> bool {
                     self.base_rev_or_base_of_delta_chain.is_some()
                 }
             }
             /// Calculate the hash of a revision given its data and its parents.
             fn hash(
                 data: &[u8],
                 p1_hash: &[u8],
                 p2_hash: &[u8],
             ) -> [u8; NODE_BYTES_LENGTH] {
                 let mut hasher = Sha1::new();
                 let (a, b) = (p1_hash, p2_hash);
                 if a > b {
                     hasher.update(b);
                     hasher.update(a);
                 } else {
                     hasher.update(a);
                     hasher.update(b);
                 }
                 hasher.update(data);
                 *hasher.finalize().as_ref()
             }

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages